Effects of Mandatory Seat Belt Laws in the US

This project details how seat belt enforcement has impacted the road fatalities during the period 1983-1997 across different states. In this project, we analyze the traffic data collected during the period and relate the impact of seat belt usage on the fatalities

library("curl")

USSeatBelts <- read.csv(curl("https://raw.githubusercontent.com/rathish-ps/RLearning/main/data/USSeatBelts.csv"))
head(USSeatBelts,10)
##     X state year miles fatalities seatbelt speed65 speed70 drinkage alcohol
## 1   1    AK 1983  3358 0.04466945       NA      no      no      yes      no
## 2   2    AK 1984  3589 0.03733630       NA      no      no      yes      no
## 3   3    AK 1985  3840 0.03307291       NA      no      no      yes      no
## 4   4    AK 1986  4008 0.02519960       NA      no      no      yes      no
## 5   5    AK 1987  3900 0.01948718       NA      no      no      yes      no
## 6   6    AK 1988  3841 0.02525384       NA      no      no      yes      no
## 7   7    AK 1989  3887 0.02161050       NA      no      no      yes      no
## 8   8    AK 1990  3979 0.02462930     0.45      no      no      yes      no
## 9   9    AK 1991  4021 0.02511813     0.66      no      no      yes      no
## 10 10    AK 1992  3841 0.02811768     0.66     yes      no      yes      no
##    income      age   enforce
## 1   17973 28.23497        no
## 2   18093 28.34354        no
## 3   18925 28.37282        no
## 4   18466 28.39665        no
## 5   18021 28.45325        no
## 6   18447 28.85142        no
## 7   19970 29.14895        no
## 8   21073 29.58628        no
## 9   21496 29.82771 secondary
## 10  22073 30.21070 secondary
# The mean value has reduced when seat belt

USSeatBelts[is.na(USSeatBelts)] = 0
meanValueWithoutSeatBelt <- mean(USSeatBelts$fatalities[USSeatBelts$seatbelt ==0])
meanValueWithSeatBelt <- mean(USSeatBelts$fatalities[USSeatBelts$seatbelt > 0])


medianValueWithoutSeatBelt <- median(USSeatBelts$fatalities[USSeatBelts$seatbelt ==0])
medianValueWithSeatBelt <- median(USSeatBelts$fatalities[USSeatBelts$seatbelt > 0])

summary(USSeatBelts)
##        X          state                year          miles       
##  Min.   :  1   Length:765         Min.   :1983   Min.   :  3099  
##  1st Qu.:192   Class :character   1st Qu.:1986   1st Qu.: 11401  
##  Median :383   Mode  :character   Median :1990   Median : 30319  
##  Mean   :383                      Mean   :1990   Mean   : 41448  
##  3rd Qu.:574                      3rd Qu.:1994   3rd Qu.: 52312  
##  Max.   :765                      Max.   :1997   Max.   :285612  
##    fatalities          seatbelt        speed65            speed70         
##  Min.   :0.008327   Min.   :0.0000   Length:765         Length:765        
##  1st Qu.:0.017341   1st Qu.:0.0000   Class :character   Class :character  
##  Median :0.021199   Median :0.4660   Mode  :character   Mode  :character  
##  Mean   :0.021490   Mean   :0.3844                                        
##  3rd Qu.:0.024774   3rd Qu.:0.6200                                        
##  Max.   :0.045470   Max.   :0.8700                                        
##    drinkage           alcohol              income           age       
##  Length:765         Length:765         Min.   : 8372   Min.   :28.23  
##  Class :character   Class :character   1st Qu.:14266   1st Qu.:34.39  
##  Mode  :character   Mode  :character   Median :17624   Median :35.39  
##                                        Mean   :17993   Mean   :35.14  
##                                        3rd Qu.:21080   3rd Qu.:36.13  
##                                        Max.   :35863   Max.   :39.17  
##    enforce         
##  Length:765        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
print(paste("Mean of Total Fatalities without SeatBelt  ",meanValueWithoutSeatBelt," and Mean of Total Fatalities with seatbelt is  ",meanValueWithSeatBelt))
## [1] "Mean of Total Fatalities without SeatBelt   0.0260679029311504  and Mean of Total Fatalities with seatbelt is   0.0197685019618986"
print(paste("Median of Total Fatalities without SeatBelt  ",medianValueWithoutSeatBelt," and Median of Total Fatalities with seatbelt is  ",medianValueWithSeatBelt))
## [1] "Median of Total Fatalities without SeatBelt   0.025036681443453  and Median of Total Fatalities with seatbelt is   0.0192152736708525"

Data transformation

Make a subset of data by row and column filtering Expand the state name , filter and replace values Replace N/A by ZERO value

USSeatBeltsSubset <-subset(USSeatBelts,state == 'AK'| state=='AL'| state=='CA',select = c(state,year,miles,fatalities,seatbelt))

USSeatBeltsSubset[is.na(USSeatBeltsSubset)] = 0

USSeatBeltsSubset$state[USSeatBeltsSubset$state == 'AL'] <- 'Alabama' 
USSeatBeltsSubset$state[USSeatBeltsSubset$state == 'AK'] <- 'Alaska'  
USSeatBeltsSubset$state[USSeatBeltsSubset$state == 'CA'] <- 'California' 


USSeatBeltsSubsetAk <-subset(USSeatBeltsSubset,state == 'Alaska',select = c(state,year,miles,fatalities,seatbelt))
USSeatBeltsSubsetAl <-subset(USSeatBeltsSubset,state == 'Alabama',select = c(state,year,miles,fatalities,seatbelt))
USSeatBeltsSubsetCa <-subset(USSeatBeltsSubset,state == 'California',select = c(state,year,miles,fatalities,seatbelt))

Scatter plot

Plot a scatter chart grouping the data and displaying the traffic fatalities during the time period for 3 different states Add a regression line to the scatter plot

library(ggplot2)

ggplot(USSeatBeltsSubset, aes(x=year, y=fatalities,colour=state,shape=state)) + geom_point(size=2) +labs(title='Road Fatalities',x='Period',y='Fatalities')+geom_smooth(method=lm, se=FALSE, fullrange=TRUE)
## `geom_smooth()` using formula 'y ~ x'

Using secondary Y Axis

p <- ggplot(USSeatBeltsSubsetAl, aes(x = year))
p <- p + geom_line(aes(y = seatbelt, colour = "SeatBelt"))
p <- p + geom_line(aes(y = fatalities, colour = "fatalities"))
p <- p + scale_y_continuous(sec.axis = sec_axis(~.*.05,name = "fatalities"))
p <- p + scale_colour_manual(values = c("blue", "red"))
p <- p + labs(y = "SeatBelt", x = "Period", colour = "Parameter")
p

Box plot example

ggplot(USSeatBeltsSubset, aes(x=state, y=fatalities,group=state)) +   geom_boxplot(outlier.colour="red", outlier.shape=8,outlier.size=4) +labs(title='Road Fatalities',x='State',y='Fatalities')

Line plot

Plot a line chart showing fatality rate vs seat belt usage This graph shows the fatalities rates are reduced when the seat belt usages are higher

 ggplot(USSeatBeltsSubset, aes(x=seatbelt, y=fatalities, group=state, color=state)) +  geom_line()+labs(title='Road Fatalities',x='Seatbelt Usage',y='Fatalities')

Reading from git

USSeatBeltsBkpData <- read.csv(curl("https://raw.githubusercontent.com/rathish-ps/RLearning/main/data/USSeatBelts.csv"))
#head(USSeatBeltsBkpData,5)

Conclusion

We noticed that mean and median values of fatality rates were came down after different states started mandating seat belt usage. The same has been visible across different graph. From the analysis it is clear that over the period traffic fatality rates were reduced as more states mandated seat belt usage