#Mini Project by: Christopher Hamilton, Jack Piscotta, and Victoria Pitonzo
#Question 1
data(cars)
median(cars$speed)
## [1] 15
#Question 2
library(jsonlite)
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=99"
raw <- fromJSON(url)
dat <- raw$Data$Data
max_close <- max(dat$close, na.rm = TRUE)
max_close
## [1] 124723
#Question 3 Part 1 - Project Title/Topic: Crash Course America: Mapping the States with the Worst Drivers Part 2 - Research Question: -Which U.S. states have the highest number of drivers involved in fatal collisions per billion miles? -How do factors like speeding, alcohol use, and distracted driving correlate with fatal accident rates? -Is there a relationship between insurance losses per insured driver and bad driving behavior? Part 3 -The project uses the bad_drivers dataset from the fivethirtyeight R package, which originates from the article “Which State Has the Worst Drivers?” published by FiveThirtyEight. Link: https://fivethirtyeight.com/features/which-state-has-the-worst-drivers/? Part 4:
# install.packages("fivethirtyeight")
library(fivethirtyeight)
## Some larger datasets need to be installed separately, like senators and
## house_district_forecast. To install these, we recommend you install the
## fivethirtyeightdata package by running:
## install.packages('fivethirtyeightdata', repos =
## 'https://fivethirtyeightdata.github.io/drat/', type = 'source')
data("bad_drivers")
str(bad_drivers)
## Classes 'tbl_df', 'tbl' and 'data.frame': 51 obs. of 8 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ num_drivers : num 18.8 18.1 18.6 22.4 12 13.6 10.8 16.2 5.9 17.9 ...
## $ perc_speeding : int 39 41 35 18 35 37 46 38 34 21 ...
## $ perc_alcohol : int 30 25 28 26 28 28 36 30 27 29 ...
## $ perc_not_distracted: int 96 90 84 94 91 79 87 87 100 92 ...
## $ perc_no_previous : int 80 94 96 95 89 95 82 99 100 94 ...
## $ insurance_premiums : num 785 1053 899 827 878 ...
## $ losses : num 145 134 110 142 166 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 8
## .. ..$ State : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
## .. ..$ Number of drivers involved in fatal collisions per billion miles : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
## .. ..$ Percentage Of Drivers Involved In Fatal Collisions Who Were Speeding : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
## .. ..$ Percentage Of Drivers Involved In Fatal Collisions Who Were Alcohol-Impaired : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
## .. ..$ Percentage Of Drivers Involved In Fatal Collisions Who Were Not Distracted : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
## .. ..$ Percentage Of Drivers Involved In Fatal Collisions Who Had Not Been Involved In Any Previous Accidents: list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
## .. ..$ Car Insurance Premiums ($) : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
## .. ..$ Losses incurred by insurance companies for collisions per insured driver ($) : list()
## .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
head(bad_drivers)
## state num_drivers perc_speeding perc_alcohol perc_not_distracted
## 1 Alabama 18.8 39 30 96
## 2 Alaska 18.1 41 25 90
## 3 Arizona 18.6 35 28 84
## 4 Arkansas 22.4 18 26 94
## 5 California 12.0 35 28 91
## 6 Colorado 13.6 37 28 79
## perc_no_previous insurance_premiums losses
## 1 80 784.55 145.08
## 2 94 1053.48 133.93
## 3 96 899.47 110.35
## 4 95 827.34 142.39
## 5 89 878.41 165.63
## 6 95 835.50 139.91
Part 5:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bad_drivers_clean <- bad_drivers %>%
transmute(
State = state,
Fatal_Collisions = num_drivers,
Speeding = perc_speeding,
Alcohol = perc_alcohol,
Distracted = 100 - perc_not_distracted,
No_Previous = perc_no_previous,
Insurance_Premiums = insurance_premiums,
Insurance_Losses = losses,
Total_Risk_Behavior = Speeding + Alcohol + Distracted
) %>%
arrange(desc(Fatal_Collisions)) %>%
mutate(Rank = row_number())
summary(bad_drivers_clean)
## State Fatal_Collisions Speeding Alcohol
## Length:51 Min. : 5.90 Min. :13.00 Min. :16.00
## Class :character 1st Qu.:12.75 1st Qu.:23.00 1st Qu.:28.00
## Mode :character Median :15.60 Median :34.00 Median :30.00
## Mean :15.79 Mean :31.73 Mean :30.69
## 3rd Qu.:18.50 3rd Qu.:38.00 3rd Qu.:33.00
## Max. :23.90 Max. :54.00 Max. :44.00
## Distracted No_Previous Insurance_Premiums Insurance_Losses
## Min. : 0.00 Min. : 76.00 Min. : 642.0 Min. : 82.75
## 1st Qu.: 5.00 1st Qu.: 83.50 1st Qu.: 768.4 1st Qu.:114.64
## Median :12.00 Median : 88.00 Median : 859.0 Median :136.05
## Mean :14.08 Mean : 88.73 Mean : 887.0 Mean :134.49
## 3rd Qu.:17.00 3rd Qu.: 95.00 3rd Qu.:1007.9 3rd Qu.:151.87
## Max. :90.00 Max. :100.00 Max. :1301.5 Max. :194.78
## Total_Risk_Behavior Rank
## Min. : 45.00 Min. : 1.0
## 1st Qu.: 64.00 1st Qu.:13.5
## Median : 74.00 Median :26.0
## Mean : 76.49 Mean :26.0
## 3rd Qu.: 85.00 3rd Qu.:38.5
## Max. :136.00 Max. :51.0
sum(is.na(bad_drivers_clean))
## [1] 0
head(bad_drivers_clean, 5)
## # A tibble: 5 × 10
## State Fatal_Collisions Speeding Alcohol Distracted No_Previous
## <chr> <dbl> <int> <int> <dbl> <int>
## 1 North Dakota 23.9 23 42 1 86
## 2 South Carolina 23.9 38 41 4 81
## 3 West Virginia 23.8 34 28 3 87
## 4 Arkansas 22.4 18 26 6 95
## 5 Kentucky 21.4 19 23 22 76
## # ℹ 4 more variables: Insurance_Premiums <dbl>, Insurance_Losses <dbl>,
## # Total_Risk_Behavior <dbl>, Rank <int>