R Markdown
setwd("/Users/ingridellis/Desktop/CJS 310/Week 3")
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
X2018_UCR_PA <- read_excel("2018.UCR.PA.xlsx")
head(X2018_UCR_PA)
## # A tibble: 6 × 12
## City Population `Violent\r\ncrime` Murder and\r\nnonneg…¹ Rape Robbery
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Abington T… 55631 44 1 6 12
## 2 Adamstown 1857 3 0 0 0
## 3 Adams Town… 14105 3 0 0 0
## 4 Adams Town… 5581 0 0 0 0
## 5 Akron 4015 7 0 1 0
## 6 Albion 1466 0 0 0 0
## # ℹ abbreviated name: ¹​`Murder and\r\nnonnegligent\r\nmanslaughter`
## # ℹ 6 more variables: `Aggravated\r\nassault` <dbl>, `Property\r\ncrime` <dbl>,
## # Burglary <dbl>, `Larceny-\r\ntheft` <dbl>,
## # `Motor\r\nvehicle\r\ntheft` <dbl>, Arson <dbl>
df_orig <- X2018_UCR_PA
head(df_orig)
## # A tibble: 6 × 12
## City Population `Violent\r\ncrime` Murder and\r\nnonneg…¹ Rape Robbery
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Abington T… 55631 44 1 6 12
## 2 Adamstown 1857 3 0 0 0
## 3 Adams Town… 14105 3 0 0 0
## 4 Adams Town… 5581 0 0 0 0
## 5 Akron 4015 7 0 1 0
## 6 Albion 1466 0 0 0 0
## # ℹ abbreviated name: ¹​`Murder and\r\nnonnegligent\r\nmanslaughter`
## # ℹ 6 more variables: `Aggravated\r\nassault` <dbl>, `Property\r\ncrime` <dbl>,
## # Burglary <dbl>, `Larceny-\r\ntheft` <dbl>,
## # `Motor\r\nvehicle\r\ntheft` <dbl>, Arson <dbl>
list.files()
## [1] "2018.UCR.PA.xlsx"
## [2] "502b78b2-7b33-43e4-bcde-63ffd1943dc6.png"
## [3] "Week 3 Assignment.md"
## [4] "Week 3 Assignment.Rmd"
## [5] "Week-3-Assignment.Rmd"
## [6] "Week3Classwork.R"
summary(df_orig)
## City Population Violent\r\ncrime
## Length:989 Min. : 132 Min. : 0.00
## Class :character 1st Qu.: 2066 1st Qu.: 1.00
## Mode :character Median : 4320 Median : 5.00
## Mean : 10054 Mean : 34.16
## 3rd Qu.: 9088 3rd Qu.: 15.00
## Max. :1586916 Max. :14420.00
## Murder and\r\nnonnegligent\r\nmanslaughter Rape
## Min. : 0.0000 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.: 0.000
## Median : 0.0000 Median : 0.000
## Mean : 0.6977 Mean : 2.971
## 3rd Qu.: 0.0000 3rd Qu.: 1.000
## Max. :351.0000 Max. :1095.000
## Robbery Aggravated\r\nassault Property\r\ncrime Burglary
## Min. : 0.000 Min. : 0.00 Min. : 0.0 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 9.0 1st Qu.: 1.00
## Median : 0.000 Median : 4.00 Median : 40.0 Median : 5.00
## Mean : 9.449 Mean : 21.05 Mean : 164.6 Mean : 21.42
## 3rd Qu.: 2.000 3rd Qu.: 11.00 3rd Qu.: 105.0 3rd Qu.: 12.00
## Max. :5262.000 Max. :7712.00 Max. :49145.0 Max. :6497.00
## Larceny-\r\ntheft Motor\r\nvehicle\r\ntheft Arson
## Min. : 0.0 Min. : 0.00 Min. : 0.000
## 1st Qu.: 7.0 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 32.0 Median : 1.00 Median : 0.000
## Mean : 131.3 Mean : 11.84 Mean : 1.147
## 3rd Qu.: 89.0 3rd Qu.: 4.00 3rd Qu.: 0.000
## Max. :36968.0 Max. :5680.00 Max. :430.000
### Renaming the variables
df <- df_orig %>%
rename(violent.crime = 'Violent\r\ncrime') %>%
rename(murder.manslaughter = 'Murder and\r\nnonnegligent\r\nmanslaughter') %>%
rename(aggravated.assault = 'Aggravated\r\nassault') %>%
rename(property.crime = 'Property\r\ncrime') %>%
rename(larceny.theft = 'Larceny-\r\ntheft') %>%
rename(motor.theft = 'Motor\r\nvehicle\r\ntheft')
summary(df)
## City Population violent.crime murder.manslaughter
## Length:989 Min. : 132 Min. : 0.00 Min. : 0.0000
## Class :character 1st Qu.: 2066 1st Qu.: 1.00 1st Qu.: 0.0000
## Mode :character Median : 4320 Median : 5.00 Median : 0.0000
## Mean : 10054 Mean : 34.16 Mean : 0.6977
## 3rd Qu.: 9088 3rd Qu.: 15.00 3rd Qu.: 0.0000
## Max. :1586916 Max. :14420.00 Max. :351.0000
## Rape Robbery aggravated.assault property.crime
## Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 9.0
## Median : 0.000 Median : 0.000 Median : 4.00 Median : 40.0
## Mean : 2.971 Mean : 9.449 Mean : 21.05 Mean : 164.6
## 3rd Qu.: 1.000 3rd Qu.: 2.000 3rd Qu.: 11.00 3rd Qu.: 105.0
## Max. :1095.000 Max. :5262.000 Max. :7712.00 Max. :49145.0
## Burglary larceny.theft motor.theft Arson
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.000
## 1st Qu.: 1.00 1st Qu.: 7.0 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 5.00 Median : 32.0 Median : 1.00 Median : 0.000
## Mean : 21.42 Mean : 131.3 Mean : 11.84 Mean : 1.147
## 3rd Qu.: 12.00 3rd Qu.: 89.0 3rd Qu.: 4.00 3rd Qu.: 0.000
## Max. :6497.00 Max. :36968.0 Max. :5680.00 Max. :430.000
### Adding a crime rate variable. Total number of part I offenses divided by the
### population of each city and multiplied by 100,000
df <- df %>%
mutate(crime.rate = ((violent.crime + property.crime)/Population)*100000)
summary(df)
## City Population violent.crime murder.manslaughter
## Length:989 Min. : 132 Min. : 0.00 Min. : 0.0000
## Class :character 1st Qu.: 2066 1st Qu.: 1.00 1st Qu.: 0.0000
## Mode :character Median : 4320 Median : 5.00 Median : 0.0000
## Mean : 10054 Mean : 34.16 Mean : 0.6977
## 3rd Qu.: 9088 3rd Qu.: 15.00 3rd Qu.: 0.0000
## Max. :1586916 Max. :14420.00 Max. :351.0000
## Rape Robbery aggravated.assault property.crime
## Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 9.0
## Median : 0.000 Median : 0.000 Median : 4.00 Median : 40.0
## Mean : 2.971 Mean : 9.449 Mean : 21.05 Mean : 164.6
## 3rd Qu.: 1.000 3rd Qu.: 2.000 3rd Qu.: 11.00 3rd Qu.: 105.0
## Max. :1095.000 Max. :5262.000 Max. :7712.00 Max. :49145.0
## Burglary larceny.theft motor.theft Arson
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.000
## 1st Qu.: 1.00 1st Qu.: 7.0 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 5.00 Median : 32.0 Median : 1.00 Median : 0.000
## Mean : 21.42 Mean : 131.3 Mean : 11.84 Mean : 1.147
## 3rd Qu.: 12.00 3rd Qu.: 89.0 3rd Qu.: 4.00 3rd Qu.: 0.000
## Max. :6497.00 Max. :36968.0 Max. :5680.00 Max. :430.000
## crime.rate
## Min. : 0.0
## 1st Qu.: 456.3
## Median : 904.8
## Mean : 1261.1
## 3rd Qu.: 1701.4
## Max. :17757.0
### Crime rate for each city
selected.ucr <- df %>%
select(City, crime.rate)
head(selected.ucr)
## # A tibble: 6 × 2
## City crime.rate
## <chr> <dbl>
## 1 Abington Township, Montgomery County 1785.
## 2 Adamstown 915.
## 3 Adams Township, Butler County 347.
## 4 Adams Township, Cambria County 197.
## 5 Akron 1021.
## 6 Albion 887.
### filtered so highest crime rate cities are at the top
ordered.data <- selected.ucr %>%
arrange(desc(crime.rate))
head(ordered.data)
## # A tibble: 6 × 2
## City crime.rate
## <chr> <dbl>
## 1 Wilkes-Barre Township 17757.
## 2 Frazer Township 14425.
## 3 Eddystone 10904.
## 4 Homestead 9962.
## 5 Southwest Regional, Washington County 6818.
## 6 Muncy Township 6679.
### categorizing town by population
# Define the breaks for population categories
breaks <- c(0, 10000, 50000, 100000, 500000, Inf)
# Define the labels for the population categories
labels <- c("Small", "Medium", "Large", "Very Large", "Metropolitan")
# Create a new variable 'population_category' based on the population ranges
df <- df %>%
mutate(population.category = cut(Population, breaks = breaks, labels = labels, include.lowest = TRUE))
summary(df$population.category)
## Small Medium Large Very Large Metropolitan
## 763 209 14 2 1
### average crime rate and total number of crimes for each population category
crime.table <- df %>% group_by(population.category) %>%
summarize(avg.crime.rate = mean(crime.rate, na.rm = TRUE), total.crimes = sum(crime.rate, na.rm = TRUE))
crime.table
## # A tibble: 5 × 3
## population.category avg.crime.rate total.crimes
## <fct> <dbl> <dbl>
## 1 Small 1184. 903674.
## 2 Medium 1466. 306468.
## 3 Large 1917. 26833.
## 4 Very Large 3125. 6250.
## 5 Metropolitan 4006. 4006.
### create a graph to see the distribution of crime rates across Pennsylvania cities
crime.rate.table <- df %>%
ggplot(aes(x = crime.rate, fill = ..count..)) +
geom_histogram() +
labs(x = "Crime rates", y = "Frequency", title = "Distribution of Crime Rates") +
theme_minimal()
crime.rate.table
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
