## reads csv from link to github repository, adds packages
library(readr)
seatbeltsurl <- "https://raw.githubusercontent.com/mollysiebecker/R-Bridge-Course/main/USSeatBelts.csv"
seatbelts <- read.csv(url(seatbeltsurl))
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
Problem Statement: Do mandatory seatbelt laws lead to a decrease traffic fatalities?
## cleans the data by removing the rows with 'NA' values for self-reported seatbelt usage.
seatbelts_clean <- filter(seatbelts, seatbelt != 'NA')
summary(seatbelts_clean)
## X state year miles
## Min. : 8.0 Length:556 Min. :1983 Min. : 3316
## 1st Qu.:201.8 Class :character 1st Qu.:1989 1st Qu.: 13004
## Median :373.5 Mode :character Median :1992 Median : 35143
## Mean :378.5 Mean :1992 Mean : 46134
## 3rd Qu.:548.2 3rd Qu.:1995 3rd Qu.: 59968
## Max. :765.0 Max. :1997 Max. :285612
## fatalities seatbelt speed65 speed70
## Min. :0.008327 Min. :0.0600 Length:556 Length:556
## 1st Qu.:0.016217 1st Qu.:0.4200 Class :character Class :character
## Median :0.019215 Median :0.5500 Mode :character Mode :character
## Mean :0.019769 Mean :0.5289
## 3rd Qu.:0.022647 3rd Qu.:0.6500
## Max. :0.035649 Max. :0.8700
## drinkage alcohol income age
## Length:556 Length:556 Min. : 9696 Min. :29.59
## Class :character Class :character 1st Qu.:16476 1st Qu.:34.73
## Mode :character Mode :character Median :19202 Median :35.59
## Mean :19572 Mean :35.47
## 3rd Qu.:22321 3rd Qu.:36.27
## Max. :35863 Max. :39.17
## enforce
## Length:556
## Class :character
## Mode :character
##
##
##
The data shows the number of traffic fatalities per million miles driven in a given state in a given calendar year (the median being 0.019 fatalities for every million miles.) Using the rate of the traffic fatalities instead of the number of fatalities accounts for the fact that the number of miles driven covers a range of nearly 300 million. The self-reported rate of seatbelt usage is also included, again ranging widely from 6% to 87%, with a median of 55%. Demographic data includes the median income in the state (median of $19,202) and mean age (median of ~36.) Categorical variables include whether the speed limit is 65 mph, whether the speed limit is 70 mph or greater, whether the drinking age is 21, whether the maximum BAC is 0.08, and the level of mandatory seatbelt enforcement.
## renames columns for greater clarity
seatbelts_clean <- seatbelts_clean %>%
rename("million_miles_driven" = "miles", "fatalities_per_million_miles" = "fatalities", "seatbelt_usage_rate" = "seatbelt", "seatbelt_enforcement" = "enforce")
## adds column with number of fatalities
seatbelts_clean <- seatbelts_clean %>%
mutate(number_fatalities = million_miles_driven*fatalities_per_million_miles)
## focuses on specific columns
seatbelts_clean <- select (seatbelts_clean, c(state,year,million_miles_driven,fatalities_per_million_miles, number_fatalities,seatbelt_usage_rate,seatbelt_enforcement))
head(seatbelts_clean)
## creates subset in which seatbelt enforcement was primary
seatbelts_enforced_primary <- select(filter(seatbelts_clean, seatbelt_enforcement == 'primary'), c(state,year,million_miles_driven,fatalities_per_million_miles, number_fatalities,seatbelt_usage_rate))
## creates subset in which seatbelt enforcement was secondary
seatbelts_enforced_secondary <- select(filter(seatbelts_clean, seatbelt_enforcement == 'secondary'), c(state,year,million_miles_driven,fatalities_per_million_miles, number_fatalities,seatbelt_usage_rate))
## creates subset in which seatbelt usage was not enforced
seatbelts_not_enforced <- select(filter(seatbelts_clean, seatbelt_enforcement == 'no'), c(state,year,million_miles_driven,fatalities_per_million_miles, number_fatalities,seatbelt_usage_rate))
Additional Data Exploration of Subsets:
##median fatality rate (primary enforcement, secondary enforcement, no enforcement)
median(seatbelts_enforced_primary$fatalities_per_million_miles)
## [1] 0.0187302
median(seatbelts_enforced_secondary$fatalities_per_million_miles)
## [1] 0.01888598
median(seatbelts_not_enforced$fatalities_per_million_miles)
## [1] 0.02175247
##mean fatality rate (primary enforcement, secondary enforcement, no enforcement)
mean(seatbelts_enforced_primary$fatalities_per_million_miles)
## [1] 0.0192409
mean(seatbelts_enforced_secondary$fatalities_per_million_miles)
## [1] 0.01927277
mean(seatbelts_not_enforced$fatalities_per_million_miles)
## [1] 0.02159116
For both the mean and median fatality rate, a lack of seatbelt law enforcement results in an increase in fatalities, compared to similar levels for both primary and secondary enforcement.
## median seatbelt usage rate (primary enforcement, secondary enforcement, no enforcement)
median(seatbelts_enforced_primary$seatbelt_usage_rate)
## [1] 0.72
median(seatbelts_enforced_secondary$seatbelt_usage_rate)
## [1] 0.58
median(seatbelts_not_enforced$seatbelt_usage_rate)
## [1] 0.3015
## mean seatbelt usage rate (primary enforcement, secondary enforcement, no enforcement)
mean(seatbelts_enforced_primary$seatbelt_usage_rate)
## [1] 0.7021605
mean(seatbelts_enforced_secondary$seatbelt_usage_rate)
## [1] 0.5627668
mean(seatbelts_not_enforced$seatbelt_usage_rate)
## [1] 0.3115367
For both the mean and median seatbelt usage rate, states that enforce mandatory seatbelts as a primary violation report the highest rates of seatbelt usage, followed by states that enforce mandatory seatbelts as a secondary violation, and then those that do not enforce mandatory seatbelt laws.
## creates histograms of seatbelt usage rate, fatality rate, and number of fatalities
hist(seatbelts_clean$seatbelt_usage_rate, xlab="Seatbelt Usage Rate", ylab="Frequency", main="")
hist(seatbelts_clean$fatalities_per_million_miles, xlab="Fatalities Per Million Miles", ylab="Frequency", main="")
hist(seatbelts_clean$number_fatalities, xlab="Number of Fatalities", ylab="Frequency", main="")
## creates boxplot that compares the fatality rates for the three seatbelt enforcement levels
boxplot(seatbelts_enforced_primary$fatalities_per_million_miles, seatbelts_enforced_secondary$fatalities_per_million_miles, seatbelts_not_enforced$fatalities_per_million_miles, xlab="Enforcement Level: Primary, Secondary, None", ylab="Fatality Rate", main="Traffic Fatalities by Seatbelt Enforcement Level")
## creates boxplot that compares the seatbelt usage rates for the three seatbelt enforcement levels
boxplot(seatbelts_enforced_primary$seatbelt_usage_rate, seatbelts_enforced_secondary$seatbelt_usage_rate, seatbelts_not_enforced$seatbelt_usage_rate, xlab="Enforcement Level: Primary, Secondary, None", ylab="Seatbelt Usage Rate", main="Seatbelt Usage by Enforcement Level")
## creates scatterplot of seatbelt usage rate and fatality rate
seatbelt_scatter <- qplot(seatbelt_usage_rate, fatalities_per_million_miles, data=seatbelts_clean)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(seatbelt_scatter + ggtitle("Seat Belt Usage and Traffic Fatalities"))
## finds correlation coefficient between seatbelt usage and fatality rate
cor(seatbelts_clean$seatbelt_usage_rate, seatbelts_clean$fatalities_per_million_miles)
## [1] -0.4027138
## creates scatterplots of seatbelt usage rate and fatality rate, for three different enforcement levels
primary_scatter <- qplot(seatbelt_usage_rate, fatalities_per_million_miles, data=seatbelts_enforced_primary)
print(primary_scatter + ggtitle("Primary Enforcement: Seat Belt Usage and Traffic Fatalities"))
secondary_scatter <- qplot(seatbelt_usage_rate, fatalities_per_million_miles, data=seatbelts_enforced_secondary)
print(secondary_scatter + ggtitle("Secondary Enforcement: Seat Belt Usage and Traffic Fatalities"))
no_enforcement_scatter <- qplot(seatbelt_usage_rate, fatalities_per_million_miles, data=seatbelts_not_enforced)
print(no_enforcement_scatter + ggtitle("No Enforcement: Seat Belt Usage and Traffic Fatalities"))
## correlation coefficients for scatterplots broken down by enforcement level
cor(seatbelts_enforced_primary$seatbelt_usage_rate, seatbelts_enforced_primary$fatalities_per_million_miles)
## [1] -0.3654832
cor(seatbelts_enforced_secondary$seatbelt_usage_rate, seatbelts_enforced_secondary$fatalities_per_million_miles)
## [1] -0.3906899
cor(seatbelts_not_enforced$seatbelt_usage_rate, seatbelts_not_enforced$fatalities_per_million_miles)
## [1] -0.4680482
Higher rates of seatbelt usage have a moderate negative correlation with the rate of traffic fatalities. States that do not enforce mandatory seatbelt laws have a higher rate of traffic fatalities, though these rates are similar for both states with primary and with secondary enforcement. Though the rate of fatalities does not differ greatly from states with primary enforcement to those with secondary enforcement, the reported seatbelt usage rate is substantially greater in states with primary enforcement than in those with secondary enforcement. Why the increase in reported seatbelt usage does not correspond to a commensurate decrease in traffic fatalities between states with primary and secondary enforcement is unclear. Further analysis could attempt to examine measures of seatbelt usage that do not rely on self-reporting, examine the impact of attitudes towards wearing seatbelts, or examine the impact of the other measures included here related to the speed limit, alcohol laws, and demographic information.