###INTRODUCTION
###DATA BACKGROUND
###PACKAGES
#Upload tidyverse, ggplot2, and dplyr libraries.
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
#> ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
#> ✔ tibble 3.1.8 ✔ dplyr 1.0.9
#> ✔ tidyr 1.2.0 ✔ stringr 1.4.1
#> ✔ readr 2.1.2 ✔ forcats 0.5.2
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(dplyr)
library(infer)
###ANALYSIS
###SECTION 1:LOAD THE DATA
# Load the dataset from fivethirtyeight.com and read the dataset with read.csv.
<- "https://raw.githubusercontent.com/fivethirtyeight/data/master/bad-drivers/bad-drivers.csv"
theUrl
<- read.csv(file=theUrl, header=TRUE, sep=",")
worst_drivers
head(worst_drivers)
#> State Number.of.drivers.involved.in.fatal.collisions.per.billion.miles
#> 1 Alabama 18.8
#> 2 Alaska 18.1
#> 3 Arizona 18.6
#> 4 Arkansas 22.4
#> 5 California 12.0
#> 6 Colorado 13.6
#> Percentage.Of.Drivers.Involved.In.Fatal.Collisions.Who.Were.Speeding
#> 1 39
#> 2 41
#> 3 35
#> 4 18
#> 5 35
#> 6 37
#> Percentage.Of.Drivers.Involved.In.Fatal.Collisions.Who.Were.Alcohol.Impaired
#> 1 30
#> 2 25
#> 3 28
#> 4 26
#> 5 28
#> 6 28
#> Percentage.Of.Drivers.Involved.In.Fatal.Collisions.Who.Were.Not.Distracted
#> 1 96
#> 2 90
#> 3 84
#> 4 94
#> 5 91
#> 6 79
#> Percentage.Of.Drivers.Involved.In.Fatal.Collisions.Who.Had.Not.Been.Involved.In.Any.Previous.Accidents
#> 1 80
#> 2 94
#> 3 96
#> 4 95
#> 5 89
#> 6 95
#> Car.Insurance.Premiums....
#> 1 784.55
#> 2 1053.48
#> 3 899.47
#> 4 827.34
#> 5 878.41
#> 6 835.50
#> Losses.incurred.by.insurance.companies.for.collisions.per.insured.driver....
#> 1 145.08
#> 2 133.93
#> 3 110.35
#> 4 142.39
#> 5 165.63
#> 6 139.91
###SECTION 2: EXPLORATORY DATA ANALYSIS/CHECKING ASSUMPTION ###SECTION 3: SUMMARY STATISTICS
# Checking the columns name and how many rows and columns and variables type in that particular column.
glimpse(worst_drivers)
#> Rows: 51
#> Columns: 8
#> $ State <chr> …
#> $ Number.of.drivers.involved.in.fatal.collisions.per.billion.miles <dbl> …
#> $ Percentage.Of.Drivers.Involved.In.Fatal.Collisions.Who.Were.Speeding <int> …
#> $ Percentage.Of.Drivers.Involved.In.Fatal.Collisions.Who.Were.Alcohol.Impaired <int> …
#> $ Percentage.Of.Drivers.Involved.In.Fatal.Collisions.Who.Were.Not.Distracted <int> …
#> $ Percentage.Of.Drivers.Involved.In.Fatal.Collisions.Who.Had.Not.Been.Involved.In.Any.Previous.Accidents <int> …
#> $ Car.Insurance.Premiums.... <dbl> …
#> $ Losses.incurred.by.insurance.companies.for.collisions.per.insured.driver.... <dbl> …
# Rename the columns for easier reading.
colnames(worst_drivers) <- c("STATE",
"DRIVERS_INVOLVED",
"PERC_DRIVERS_SPEED",
"PERC_DRIVERS_ALCHO",
"PERC_DRIVERS_NOT_DIST",
"PERC_DRIVERS_NO_ACC",
"INS_PREM",
"LOSS_INSCOMP")
glimpse(worst_drivers)
#> Rows: 51
#> Columns: 8
#> $ STATE <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Cal…
#> $ DRIVERS_INVOLVED <dbl> 18.8, 18.1, 18.6, 22.4, 12.0, 13.6, 10.8, 16.2, …
#> $ PERC_DRIVERS_SPEED <int> 39, 41, 35, 18, 35, 37, 46, 38, 34, 21, 19, 54, …
#> $ PERC_DRIVERS_ALCHO <int> 30, 25, 28, 26, 28, 28, 36, 30, 27, 29, 25, 41, …
#> $ PERC_DRIVERS_NOT_DIST <int> 96, 90, 84, 94, 91, 79, 87, 87, 100, 92, 95, 82,…
#> $ PERC_DRIVERS_NO_ACC <int> 80, 94, 96, 95, 89, 95, 82, 99, 100, 94, 93, 87,…
#> $ INS_PREM <dbl> 784.55, 1053.48, 899.47, 827.34, 878.41, 835.50,…
#> $ LOSS_INSCOMP <dbl> 145.08, 133.93, 110.35, 142.39, 165.63, 139.91, …
# Create new column DRIVERS_SPEED which will be (DRIVERS_INVOLVED*PERC_DRIVERS_SPEED)/100
<- worst_drivers %>%
percent_worst_drivers mutate(DRIVERS_SPEED=(DRIVERS_INVOLVED*PERC_DRIVERS_SPEED)/100) %>%
mutate(DRIVERS_ALCHO=(DRIVERS_INVOLVED*PERC_DRIVERS_ALCHO)/100) %>%
mutate(DRIVERS_NOT_DIST=(DRIVERS_INVOLVED*PERC_DRIVERS_NOT_DIST)/100) %>%
mutate(DRIVERS_NO_ACC=(DRIVERS_INVOLVED*PERC_DRIVERS_NO_ACC)/100)
glimpse(percent_worst_drivers)
#> Rows: 51
#> Columns: 12
#> $ STATE <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Cal…
#> $ DRIVERS_INVOLVED <dbl> 18.8, 18.1, 18.6, 22.4, 12.0, 13.6, 10.8, 16.2, …
#> $ PERC_DRIVERS_SPEED <int> 39, 41, 35, 18, 35, 37, 46, 38, 34, 21, 19, 54, …
#> $ PERC_DRIVERS_ALCHO <int> 30, 25, 28, 26, 28, 28, 36, 30, 27, 29, 25, 41, …
#> $ PERC_DRIVERS_NOT_DIST <int> 96, 90, 84, 94, 91, 79, 87, 87, 100, 92, 95, 82,…
#> $ PERC_DRIVERS_NO_ACC <int> 80, 94, 96, 95, 89, 95, 82, 99, 100, 94, 93, 87,…
#> $ INS_PREM <dbl> 784.55, 1053.48, 899.47, 827.34, 878.41, 835.50,…
#> $ LOSS_INSCOMP <dbl> 145.08, 133.93, 110.35, 142.39, 165.63, 139.91, …
#> $ DRIVERS_SPEED <dbl> 7.332, 7.421, 6.510, 4.032, 4.200, 5.032, 4.968,…
#> $ DRIVERS_ALCHO <dbl> 5.640, 4.525, 5.208, 5.824, 3.360, 3.808, 3.888,…
#> $ DRIVERS_NOT_DIST <dbl> 18.048, 16.290, 15.624, 21.056, 10.920, 10.744, …
#> $ DRIVERS_NO_ACC <dbl> 15.040, 17.014, 17.856, 21.280, 10.680, 12.920, …
###SECTION 4: VISUALIZING THE RESULTS
%>%
percent_worst_drivers select(STATE, DRIVERS_INVOLVED, DRIVERS_SPEED) %>%
gather(type, value, DRIVERS_INVOLVED:DRIVERS_SPEED) %>%
ggplot(., aes(x = STATE,y = value, fill = type)) +
geom_bar(position = "stack", stat="identity") +
scale_fill_manual(values = c("red", "darkred")) +
ylab("Drivers involved in Fatal collision while Speeding") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
###SESSION INFORMATION
options(width = 100)
::session_info()
devtools#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.2.1 (2022-06-23 ucrt)
#> os Windows 10 x64 (build 19043)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.utf8
#> ctype English_United States.utf8
#> tz America/New_York
#> date 2022-10-30
#> pandoc 2.18 @ C:/Program Files/RStudio/bin/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.1)
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0)
#> broom 1.0.1 2022-08-29 [1] CRAN (R 4.2.1)
#> bslib 0.4.0 2022-07-16 [1] CRAN (R 4.2.1)
#> cachem 1.0.6 2021-08-19 [1] CRAN (R 4.2.1)
#> callr 3.7.2 2022-08-22 [1] CRAN (R 4.2.1)
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.1)
#> cli 3.3.0 2022-04-25 [1] CRAN (R 4.2.1)
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.1)
#> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.1)
#> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.1)
#> dbplyr 2.2.1 2022-06-27 [1] CRAN (R 4.2.1)
#> devtools 2.4.4 2022-07-20 [1] CRAN (R 4.2.1)
#> digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.1)
#> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.1)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.1)
#> evaluate 0.16 2022-08-09 [1] CRAN (R 4.2.1)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.1)
#> farver 2.1.1 2022-07-06 [1] CRAN (R 4.2.1)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.1)
#> forcats * 0.5.2 2022-08-19 [1] CRAN (R 4.2.1)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.1)
#> gargle 1.2.0 2021-07-02 [1] CRAN (R 4.2.1)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.1)
#> ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.1)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.1)
#> googledrive 2.0.0 2021-07-08 [1] CRAN (R 4.2.1)
#> googlesheets4 1.0.1 2022-08-13 [1] CRAN (R 4.2.1)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.2.1)
#> haven 2.5.1 2022-08-22 [1] CRAN (R 4.2.1)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.2.1)
#> hms 1.1.2 2022-08-19 [1] CRAN (R 4.2.1)
#> htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.2.1)
#> htmlwidgets 1.5.4 2021-09-08 [1] CRAN (R 4.2.1)
#> httpuv 1.6.5 2022-01-05 [1] CRAN (R 4.2.1)
#> httr 1.4.4 2022-08-17 [1] CRAN (R 4.2.1)
#> infer * 1.0.3 2022-08-22 [1] CRAN (R 4.2.1)
#> jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.2.1)
#> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.2.1)
#> knitr 1.40 2022-08-24 [1] CRAN (R 4.2.1)
#> labeling 0.4.2 2020-10-20 [1] CRAN (R 4.2.0)
#> later 1.3.0 2021-08-18 [1] CRAN (R 4.2.1)
#> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.1)
#> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.2.1)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.1)
#> memoise 2.0.1 2021-11-26 [1] CRAN (R 4.2.1)
#> mime 0.12 2021-09-28 [1] CRAN (R 4.2.0)
#> miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.2.1)
#> modelr 0.1.9 2022-08-19 [1] CRAN (R 4.2.1)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.1)
#> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.1)
#> pkgbuild 1.3.1 2021-12-20 [1] CRAN (R 4.2.1)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.1)
#> pkgload 1.3.0 2022-06-27 [1] CRAN (R 4.2.1)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.2.1)
#> processx 3.7.0 2022-07-07 [1] CRAN (R 4.2.1)
#> profvis 0.3.7 2020-11-02 [1] CRAN (R 4.2.1)
#> promises 1.2.0.1 2021-02-11 [1] CRAN (R 4.2.1)
#> ps 1.7.1 2022-06-18 [1] CRAN (R 4.2.1)
#> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.1)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.1)
#> Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.2.1)
#> readr * 2.1.2 2022-01-30 [1] CRAN (R 4.2.1)
#> readxl 1.4.1 2022-08-17 [1] CRAN (R 4.2.1)
#> remotes 2.4.2 2021-11-30 [1] CRAN (R 4.2.1)
#> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.1)
#> rlang 1.0.4 2022-07-12 [1] CRAN (R 4.2.1)
#> rmarkdown 2.16 2022-08-24 [1] CRAN (R 4.2.1)
#> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.1)
#> rvest 1.0.3 2022-08-19 [1] CRAN (R 4.2.1)
#> sass 0.4.2 2022-07-16 [1] CRAN (R 4.2.1)
#> scales 1.2.1 2022-08-20 [1] CRAN (R 4.2.1)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.1)
#> shiny 1.7.2 2022-07-19 [1] CRAN (R 4.2.1)
#> stringi 1.7.8 2022-07-11 [1] CRAN (R 4.2.1)
#> stringr * 1.4.1 2022-08-20 [1] CRAN (R 4.2.1)
#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.1)
#> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.2.1)
#> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.1)
#> tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.1)
#> tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.2.1)
#> urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.2.1)
#> usethis 2.1.6 2022-05-25 [1] CRAN (R 4.2.1)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.1)
#> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.1)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.1)
#> xfun 0.32 2022-08-10 [1] CRAN (R 4.2.1)
#> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.1)
#> xtable 1.8-4 2019-04-21 [1] CRAN (R 4.2.1)
#> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.0)
#>
#> [1] C:/Users/enidr/AppData/Local/R/win-library/4.2
#> [2] C:/Program Files/R/R-4.2.1/library
#>
#> ──────────────────────────────────────────────────────────────────────────────────────────────────