## reads csv from link to github repository, adds packages

library(readr)
seatbeltsurl <- "https://raw.githubusercontent.com/mollysiebecker/R-Bridge-Course/main/USSeatBelts.csv"
seatbelts <- read.csv(url(seatbeltsurl))
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Problem Statement: Do mandatory seatbelt laws lead to a decrease traffic fatalities?

  1. Initial Data Exploration
## cleans the data by removing the rows with 'NA' values for self-reported seatbelt usage.
seatbelts_clean <- filter(seatbelts, seatbelt != 'NA')

summary(seatbelts_clean)
##        X            state                year          miles       
##  Min.   :  8.0   Length:556         Min.   :1983   Min.   :  3316  
##  1st Qu.:201.8   Class :character   1st Qu.:1989   1st Qu.: 13004  
##  Median :373.5   Mode  :character   Median :1992   Median : 35143  
##  Mean   :378.5                      Mean   :1992   Mean   : 46134  
##  3rd Qu.:548.2                      3rd Qu.:1995   3rd Qu.: 59968  
##  Max.   :765.0                      Max.   :1997   Max.   :285612  
##    fatalities          seatbelt        speed65            speed70         
##  Min.   :0.008327   Min.   :0.0600   Length:556         Length:556        
##  1st Qu.:0.016217   1st Qu.:0.4200   Class :character   Class :character  
##  Median :0.019215   Median :0.5500   Mode  :character   Mode  :character  
##  Mean   :0.019769   Mean   :0.5289                                        
##  3rd Qu.:0.022647   3rd Qu.:0.6500                                        
##  Max.   :0.035649   Max.   :0.8700                                        
##    drinkage           alcohol              income           age       
##  Length:556         Length:556         Min.   : 9696   Min.   :29.59  
##  Class :character   Class :character   1st Qu.:16476   1st Qu.:34.73  
##  Mode  :character   Mode  :character   Median :19202   Median :35.59  
##                                        Mean   :19572   Mean   :35.47  
##                                        3rd Qu.:22321   3rd Qu.:36.27  
##                                        Max.   :35863   Max.   :39.17  
##    enforce         
##  Length:556        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

The data shows the number of traffic fatalities per million miles driven in a given state in a given calendar year (the median being 0.019 fatalities for every million miles.) Using the rate of the traffic fatalities instead of the number of fatalities accounts for the fact that the number of miles driven covers a range of nearly 300 million. The self-reported rate of seatbelt usage is also included, again ranging widely from 6% to 87%, with a median of 55%. Demographic data includes the median income in the state (median of $19,202) and mean age (median of ~36.) Categorical variables include whether the speed limit is 65 mph, whether the speed limit is 70 mph or greater, whether the drinking age is 21, whether the maximum BAC is 0.08, and the level of mandatory seatbelt enforcement.

  1. Data wrangling
## renames columns for greater clarity
seatbelts_clean <- seatbelts_clean %>%
  rename("million_miles_driven" = "miles", "fatalities_per_million_miles" = "fatalities", "seatbelt_usage_rate" = "seatbelt", "seatbelt_enforcement" = "enforce")

## adds column with number of fatalities
seatbelts_clean <- seatbelts_clean %>%
  mutate(number_fatalities = million_miles_driven*fatalities_per_million_miles)

## focuses on specific columns
seatbelts_clean <-  select (seatbelts_clean, c(state,year,million_miles_driven,fatalities_per_million_miles, number_fatalities,seatbelt_usage_rate,seatbelt_enforcement))

head(seatbelts_clean)
## creates subset in which seatbelt enforcement was primary
seatbelts_enforced_primary <- select(filter(seatbelts_clean, seatbelt_enforcement == 'primary'), c(state,year,million_miles_driven,fatalities_per_million_miles, number_fatalities,seatbelt_usage_rate))

## creates subset in which seatbelt enforcement was secondary
seatbelts_enforced_secondary <- select(filter(seatbelts_clean, seatbelt_enforcement == 'secondary'), c(state,year,million_miles_driven,fatalities_per_million_miles, number_fatalities,seatbelt_usage_rate))

## creates subset in which seatbelt usage was not enforced
seatbelts_not_enforced <- select(filter(seatbelts_clean, seatbelt_enforcement == 'no'), c(state,year,million_miles_driven,fatalities_per_million_miles, number_fatalities,seatbelt_usage_rate))

Additional Data Exploration of Subsets:

##median fatality rate (primary enforcement, secondary enforcement, no enforcement)
median(seatbelts_enforced_primary$fatalities_per_million_miles)
## [1] 0.0187302
median(seatbelts_enforced_secondary$fatalities_per_million_miles)
## [1] 0.01888598
median(seatbelts_not_enforced$fatalities_per_million_miles)
## [1] 0.02175247
##mean fatality rate (primary enforcement, secondary enforcement, no enforcement)
mean(seatbelts_enforced_primary$fatalities_per_million_miles)
## [1] 0.0192409
mean(seatbelts_enforced_secondary$fatalities_per_million_miles)
## [1] 0.01927277
mean(seatbelts_not_enforced$fatalities_per_million_miles)
## [1] 0.02159116

For both the mean and median fatality rate, a lack of seatbelt law enforcement results in an increase in fatalities, compared to similar levels for both primary and secondary enforcement.

## median seatbelt usage rate (primary enforcement, secondary enforcement, no enforcement)
median(seatbelts_enforced_primary$seatbelt_usage_rate)
## [1] 0.72
median(seatbelts_enforced_secondary$seatbelt_usage_rate)
## [1] 0.58
median(seatbelts_not_enforced$seatbelt_usage_rate)
## [1] 0.3015
## mean seatbelt usage rate (primary enforcement, secondary enforcement, no enforcement)
mean(seatbelts_enforced_primary$seatbelt_usage_rate)
## [1] 0.7021605
mean(seatbelts_enforced_secondary$seatbelt_usage_rate)
## [1] 0.5627668
mean(seatbelts_not_enforced$seatbelt_usage_rate)
## [1] 0.3115367

For both the mean and median seatbelt usage rate, states that enforce mandatory seatbelts as a primary violation report the highest rates of seatbelt usage, followed by states that enforce mandatory seatbelts as a secondary violation, and then those that do not enforce mandatory seatbelt laws.

  1. Graphics
## creates histograms of seatbelt usage rate, fatality rate, and number of fatalities
hist(seatbelts_clean$seatbelt_usage_rate, xlab="Seatbelt Usage Rate", ylab="Frequency", main="")

hist(seatbelts_clean$fatalities_per_million_miles, xlab="Fatalities Per Million Miles", ylab="Frequency", main="")

hist(seatbelts_clean$number_fatalities, xlab="Number of Fatalities", ylab="Frequency", main="")

## creates boxplot that compares the fatality rates for the three seatbelt enforcement levels
boxplot(seatbelts_enforced_primary$fatalities_per_million_miles, seatbelts_enforced_secondary$fatalities_per_million_miles, seatbelts_not_enforced$fatalities_per_million_miles, xlab="Enforcement Level: Primary, Secondary, None", ylab="Fatality Rate", main="Traffic Fatalities by Seatbelt Enforcement Level")

## creates boxplot that compares the seatbelt usage rates for the three seatbelt enforcement levels
boxplot(seatbelts_enforced_primary$seatbelt_usage_rate, seatbelts_enforced_secondary$seatbelt_usage_rate, seatbelts_not_enforced$seatbelt_usage_rate, xlab="Enforcement Level: Primary, Secondary, None", ylab="Seatbelt Usage Rate", main="Seatbelt Usage by Enforcement Level")

## creates scatterplot of seatbelt usage rate and fatality rate
seatbelt_scatter <- qplot(seatbelt_usage_rate, fatalities_per_million_miles, data=seatbelts_clean)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(seatbelt_scatter + ggtitle("Seat Belt Usage and Traffic Fatalities"))

## finds correlation coefficient between seatbelt usage and fatality rate
cor(seatbelts_clean$seatbelt_usage_rate, seatbelts_clean$fatalities_per_million_miles)
## [1] -0.4027138
## creates scatterplots of seatbelt usage rate and fatality rate, for three different enforcement levels

primary_scatter <- qplot(seatbelt_usage_rate, fatalities_per_million_miles, data=seatbelts_enforced_primary)
print(primary_scatter + ggtitle("Primary Enforcement: Seat Belt Usage and Traffic Fatalities"))

secondary_scatter <- qplot(seatbelt_usage_rate, fatalities_per_million_miles, data=seatbelts_enforced_secondary)
print(secondary_scatter + ggtitle("Secondary Enforcement: Seat Belt Usage and Traffic Fatalities"))

no_enforcement_scatter <- qplot(seatbelt_usage_rate, fatalities_per_million_miles, data=seatbelts_not_enforced)
print(no_enforcement_scatter + ggtitle("No Enforcement: Seat Belt Usage and Traffic Fatalities"))

## correlation coefficients for scatterplots broken down by enforcement level
cor(seatbelts_enforced_primary$seatbelt_usage_rate, seatbelts_enforced_primary$fatalities_per_million_miles)
## [1] -0.3654832
cor(seatbelts_enforced_secondary$seatbelt_usage_rate, seatbelts_enforced_secondary$fatalities_per_million_miles)
## [1] -0.3906899
cor(seatbelts_not_enforced$seatbelt_usage_rate, seatbelts_not_enforced$fatalities_per_million_miles)
## [1] -0.4680482
  1. Conclusion

Higher rates of seatbelt usage have a moderate negative correlation with the rate of traffic fatalities. States that do not enforce mandatory seatbelt laws have a higher rate of traffic fatalities, though these rates are similar for both states with primary and with secondary enforcement. Though the rate of fatalities does not differ greatly from states with primary enforcement to those with secondary enforcement, the reported seatbelt usage rate is substantially greater in states with primary enforcement than in those with secondary enforcement. Why the increase in reported seatbelt usage does not correspond to a commensurate decrease in traffic fatalities between states with primary and secondary enforcement is unclear. Further analysis could attempt to examine measures of seatbelt usage that do not rely on self-reporting, examine the impact of attitudes towards wearing seatbelts, or examine the impact of the other measures included here related to the speed limit, alcohol laws, and demographic information.