library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# read in the data from GitHub
url <- "https://raw.githubusercontent.com/josh1den/MSDS-R-Bridge/main/Guns.csv"
GunData <- read.csv(url)
colnames(GunData)
## [1] "X" "year" "violent" "murder" "robbery"
## [6] "prisoners" "afam" "cauc" "male" "population"
## [11] "income" "density" "state" "law"
# view unique states
unique(GunData$state)
## [1] "Alabama" "Alaska" "Arizona"
## [4] "Arkansas" "California" "Colorado"
## [7] "Connecticut" "Delaware" "District of Columbia"
## [10] "Florida" "Georgia" "Hawaii"
## [13] "Idaho" "Illinois" "Indiana"
## [16] "Iowa" "Kansas" "Kentucky"
## [19] "Louisiana" "Maine" "Maryland"
## [22] "Massachusetts" "Michigan" "Minnesota"
## [25] "Mississippi" "Missouri" "Montana"
## [28] "Nebraska" "Nevada" "New Hampshire"
## [31] "New Jersey" "New Mexico" "New York"
## [34] "North Carolina" "North Dakota" "Ohio"
## [37] "Oklahoma" "Oregon" "Pennsylvania"
## [40] "Rhode Island" "South Carolina" "South Dakota"
## [43] "Tennessee" "Texas" "Utah"
## [46] "Vermont" "Virginia" "Washington"
## [49] "West Virginia" "Wisconsin" "Wyoming"
str(GunData)
## 'data.frame': 1173 obs. of 14 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ year : int 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 ...
## $ violent : num 414 419 413 448 470 ...
## $ murder : num 14.2 13.3 13.2 13.2 11.9 10.6 9.2 9.4 9.8 10.1 ...
## $ robbery : num 96.8 99.1 109.5 132.1 126.5 ...
## $ prisoners : int 83 94 144 141 149 183 215 243 256 267 ...
## $ afam : num 8.38 8.35 8.33 8.41 8.48 ...
## $ cauc : num 55.1 55.1 55.1 54.9 54.9 ...
## $ male : num 18.2 18 17.8 17.7 17.7 ...
## $ population: num 3.78 3.83 3.87 3.9 3.92 ...
## $ income : num 9563 9932 9877 9541 9548 ...
## $ density : num 0.0746 0.0756 0.0762 0.0768 0.0772 ...
## $ state : chr "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ law : chr "no" "no" "no" "no" ...
summary(GunData)
## X year violent murder
## Min. : 1 Min. :1977 Min. : 47.0 Min. : 0.200
## 1st Qu.: 294 1st Qu.:1982 1st Qu.: 283.1 1st Qu.: 3.700
## Median : 587 Median :1988 Median : 443.0 Median : 6.400
## Mean : 587 Mean :1988 Mean : 503.1 Mean : 7.665
## 3rd Qu.: 880 3rd Qu.:1994 3rd Qu.: 650.9 3rd Qu.: 9.800
## Max. :1173 Max. :1999 Max. :2921.8 Max. :80.600
## robbery prisoners afam cauc
## Min. : 6.4 Min. : 19.0 Min. : 0.2482 Min. :21.78
## 1st Qu.: 71.1 1st Qu.: 114.0 1st Qu.: 2.2022 1st Qu.:59.94
## Median : 124.1 Median : 187.0 Median : 4.0262 Median :65.06
## Mean : 161.8 Mean : 226.6 Mean : 5.3362 Mean :62.95
## 3rd Qu.: 192.7 3rd Qu.: 291.0 3rd Qu.: 6.8507 3rd Qu.:69.20
## Max. :1635.1 Max. :1913.0 Max. :26.9796 Max. :76.53
## male population income density
## Min. :12.21 Min. : 0.4027 Min. : 8555 Min. : 0.000707
## 1st Qu.:14.65 1st Qu.: 1.1877 1st Qu.:11935 1st Qu.: 0.031911
## Median :15.90 Median : 3.2713 Median :13402 Median : 0.081569
## Mean :16.08 Mean : 4.8163 Mean :13725 Mean : 0.352038
## 3rd Qu.:17.53 3rd Qu.: 5.6856 3rd Qu.:15271 3rd Qu.: 0.177718
## Max. :22.35 Max. :33.1451 Max. :23647 Max. :11.102120
## state law
## Length:1173 Length:1173
## Class :character Class :character
## Mode :character Mode :character
##
##
##
Many of the columns have outliers (violent, murder, robbery, prisoners, afam, population).
Due to the categorical nature of the “Law” column, the relationship between laws and rates is unclear.
boxplot(GunData[,3:5])
This analysis is focusing on the distribution and relationship of crime rates and income, density, and shall carry laws, using the following techniques:
# subset the data
# desired columns to retain
cols <- c("year", "violent","murder", "robbery", "density", "state", "law")
# create new dataframe from subset data
guns <- GunData[cols]
head(guns)
## year violent murder robbery density state law
## 1 1977 414.4 14.2 96.8 0.0745524 Alabama no
## 2 1978 419.1 13.3 99.1 0.0755667 Alabama no
## 3 1979 413.3 13.2 109.5 0.0762453 Alabama no
## 4 1980 448.5 13.2 132.1 0.0768288 Alabama no
## 5 1981 470.5 11.9 126.5 0.0771866 Alabama no
## 6 1982 447.7 10.6 112.0 0.0773185 Alabama no
guns$crime <- guns$violent + guns$murder + guns$robbery
head(guns)
## year violent murder robbery density state law crime
## 1 1977 414.4 14.2 96.8 0.0745524 Alabama no 525.4
## 2 1978 419.1 13.3 99.1 0.0755667 Alabama no 531.5
## 3 1979 413.3 13.2 109.5 0.0762453 Alabama no 536.0
## 4 1980 448.5 13.2 132.1 0.0768288 Alabama no 593.8
## 5 1981 470.5 11.9 126.5 0.0771866 Alabama no 608.9
## 6 1982 447.7 10.6 112.0 0.0773185 Alabama no 570.3
# define columns to drop
drop <- c("violent","murder","robbery")
# keep only the columns not in drop
guns <- guns[,!(names(guns) %in% drop)]
head(guns)
## year density state law crime
## 1 1977 0.0745524 Alabama no 525.4
## 2 1978 0.0755667 Alabama no 531.5
## 3 1979 0.0762453 Alabama no 536.0
## 4 1980 0.0768288 Alabama no 593.8
## 5 1981 0.0771866 Alabama no 608.9
## 6 1982 0.0773185 Alabama no 570.3
guns$law <- ifelse(guns$law=="yes",1,0)
head(guns)
## year density state law crime
## 1 1977 0.0745524 Alabama 0 525.4
## 2 1978 0.0755667 Alabama 0 531.5
## 3 1979 0.0762453 Alabama 0 536.0
## 4 1980 0.0768288 Alabama 0 593.8
## 5 1981 0.0771866 Alabama 0 608.9
## 6 1982 0.0773185 Alabama 0 570.3
order <- c(3, 1, 5, 2, 4)
guns <- guns[, order]
head(guns)
## state year crime density law
## 1 Alabama 1977 525.4 0.0745524 0
## 2 Alabama 1978 531.5 0.0755667 0
## 3 Alabama 1979 536.0 0.0762453 0
## 4 Alabama 1980 593.8 0.0768288 0
## 5 Alabama 1981 608.9 0.0771866 0
## 6 Alabama 1982 570.3 0.0773185 0
m <- ggplot(guns, aes(x = crime)) + geom_histogram(binwidth=15)
title <- ggtitle("Distribution of Crime Rates")
center <- theme(plot.title = element_text(hjust = 0.5))
xl <- xlab("Crime Rate")
yl <- ylab("Count")
m + title + xl + yl + center
Most of the crime rates are between 100-1000 per 100k. The presence of outliers is in line with the summary statistics and boxplot.
crime_law <- aggregate(cbind(crime, law) ~ year, guns, mean)
c1 <- ggplot(crime_law, aes(x=year, y=crime)) +
geom_line()
title <- ggtitle("Average Crime Rates")
center <- theme(plot.title = element_text(hjust = 0.5))
xl <- xlab("Year")
yl <- ylab("per 100k")
c1 + title + xl + yl + center
Crimes have risen and fallen since 1977, rising in the late 70s/early 80s, then decreasing, before rising again in the late 80s, reaching a peak in the early 90s, and steadily decreasing since 1993.
c2 <- ggplot(crime_law, aes(x=year, y=law)) +
geom_line()
title <- ggtitle("Percent of States with Gun Laws")
center <- theme(plot.title = element_text(hjust = 0.5))
xl <- xlab("Year")
yl <- ylab("Percentage")
c2 + title + xl + yl + center
Less than 10% of states had gun laws in 1977, with the figure steadily increasing since 1985, and dramatically increasing since 1994.
crimelaw2 <- aggregate(crime ~ law, guns, FUN = function(x) round(mean(x)))
crimelaw2$law <- gsub(0, "No", as.character(crimelaw2$law))
crimelaw2$law <- gsub(1, "Yes", as.character(crimelaw2$law))
p <- ggplot(crimelaw2, aes(x=law, y=crime)) +
geom_bar(stat="identity", fill="steelblue")+
theme_minimal() +
geom_text(aes(label=crime), vjust=1.6, color="white",
position = position_dodge(0.9), size=3.5)
labels <- labs(title="Average Crime Rates With/Without Shall Carry Laws", x="Law?", y="Avg Crime Rate")
center <- theme(plot.title = element_text(hjust = 0.5))
p + labels + center
States without shall carry laws on average experience 41% higher crime rates than states with laws.
g1 <- ggplot(guns, aes(x = density, y = crime)) + geom_point()
labels <- labs(title="Crime Rates by Density", x="Population per Sq Mile (x 1000)", y="Crime Rate per 100k")
center <- theme(plot.title = element_text(hjust = 0.5))
g1 + labels + center
Each of the highest crimes rates occur in the highest density populations. Do these populations have gun laws?
# subset dataframe by density, crime, and law: cols, dcl
cols <- c("crime","density","law")
dcl <- guns[cols]
dcl$law <- gsub(0, "No Law", as.character(dcl$law))
dcl$law <- gsub(1, "Law", as.character(dcl$law))
q <- ggplot(dcl, aes(x=density, y=crime, color=law)) +
geom_point(size = 3, alpha=0.6) +
labs(title = "Crime Rate by Population Density and Shall Carry Laws",
x="Population per Sq Mile (x 1000)", y="Crime Rate per 100k")
q
The conclusions of this analysis are:
Crime rates fluctuated between between 1977 and 1993, peaked in 1993, and decreased steadily until 1999.
In 1977, less than 10% states had shall carry laws in effect. In 1993, less than 35% of states had passed shall carry laws. By 1999, the figure was nearly 60%.
States with active shall carry laws on average had crime rates 41% lower than states without laws.
The states with the highest population density had the highest crime rates. None of those states had shall carry laws.