#Mini Project by: Christopher Hamilton, Jack Piscotta, and Victoria Pitonzo

#Question 1

data(cars)
median(cars$speed)
## [1] 15

#Question 2

library(jsonlite)
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=99"
raw <- fromJSON(url)
dat <- raw$Data$Data
max_close <- max(dat$close, na.rm = TRUE)
max_close
## [1] 124723

#Question 3 Part 1 - Project Title/Topic: Crash Course America: Mapping the States with the Worst Drivers Part 2 - Research Question: -Which U.S. states have the highest number of drivers involved in fatal collisions per billion miles? -How do factors like speeding, alcohol use, and distracted driving correlate with fatal accident rates? -Is there a relationship between insurance losses per insured driver and bad driving behavior? Part 3 -The project uses the bad_drivers dataset from the fivethirtyeight R package, which originates from the article “Which State Has the Worst Drivers?” published by FiveThirtyEight. Link: https://fivethirtyeight.com/features/which-state-has-the-worst-drivers/? Part 4:

# install.packages("fivethirtyeight")
library(fivethirtyeight)
## Some larger datasets need to be installed separately, like senators and
## house_district_forecast. To install these, we recommend you install the
## fivethirtyeightdata package by running:
## install.packages('fivethirtyeightdata', repos =
## 'https://fivethirtyeightdata.github.io/drat/', type = 'source')
data("bad_drivers")
str(bad_drivers)
## Classes 'tbl_df', 'tbl' and 'data.frame':    51 obs. of  8 variables:
##  $ state              : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ num_drivers        : num  18.8 18.1 18.6 22.4 12 13.6 10.8 16.2 5.9 17.9 ...
##  $ perc_speeding      : int  39 41 35 18 35 37 46 38 34 21 ...
##  $ perc_alcohol       : int  30 25 28 26 28 28 36 30 27 29 ...
##  $ perc_not_distracted: int  96 90 84 94 91 79 87 87 100 92 ...
##  $ perc_no_previous   : int  80 94 96 95 89 95 82 99 100 94 ...
##  $ insurance_premiums : num  785 1053 899 827 878 ...
##  $ losses             : num  145 134 110 142 166 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 8
##   .. ..$ State                                                                                                 : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
##   .. ..$ Number of drivers involved in fatal collisions per billion miles                                      : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ Percentage Of Drivers Involved In Fatal Collisions Who Were Speeding                                  : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
##   .. ..$ Percentage Of Drivers Involved In Fatal Collisions Who Were Alcohol-Impaired                          : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
##   .. ..$ Percentage Of Drivers Involved In Fatal Collisions Who Were Not Distracted                            : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
##   .. ..$ Percentage Of Drivers Involved In Fatal Collisions Who Had Not Been Involved In Any Previous Accidents: list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
##   .. ..$ Car Insurance Premiums ($)                                                                            : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   .. ..$ Losses incurred by insurance companies for collisions per insured driver ($)                          : list()
##   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"
head(bad_drivers)
##        state num_drivers perc_speeding perc_alcohol perc_not_distracted
## 1    Alabama        18.8            39           30                  96
## 2     Alaska        18.1            41           25                  90
## 3    Arizona        18.6            35           28                  84
## 4   Arkansas        22.4            18           26                  94
## 5 California        12.0            35           28                  91
## 6   Colorado        13.6            37           28                  79
##   perc_no_previous insurance_premiums losses
## 1               80             784.55 145.08
## 2               94            1053.48 133.93
## 3               96             899.47 110.35
## 4               95             827.34 142.39
## 5               89             878.41 165.63
## 6               95             835.50 139.91

Part 5:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
bad_drivers_clean <- bad_drivers %>%
  transmute(
    State = state,
    Fatal_Collisions = num_drivers,          
    Speeding = perc_speeding,                
    Alcohol = perc_alcohol,                 
    Distracted = 100 - perc_not_distracted,  
    No_Previous = perc_no_previous,         
    Insurance_Premiums = insurance_premiums, 
    Insurance_Losses = losses,               
    Total_Risk_Behavior = Speeding + Alcohol + Distracted
  ) %>%
  arrange(desc(Fatal_Collisions)) %>%
  mutate(Rank = row_number())


summary(bad_drivers_clean)
##     State           Fatal_Collisions    Speeding        Alcohol     
##  Length:51          Min.   : 5.90    Min.   :13.00   Min.   :16.00  
##  Class :character   1st Qu.:12.75    1st Qu.:23.00   1st Qu.:28.00  
##  Mode  :character   Median :15.60    Median :34.00   Median :30.00  
##                     Mean   :15.79    Mean   :31.73   Mean   :30.69  
##                     3rd Qu.:18.50    3rd Qu.:38.00   3rd Qu.:33.00  
##                     Max.   :23.90    Max.   :54.00   Max.   :44.00  
##    Distracted     No_Previous     Insurance_Premiums Insurance_Losses
##  Min.   : 0.00   Min.   : 76.00   Min.   : 642.0     Min.   : 82.75  
##  1st Qu.: 5.00   1st Qu.: 83.50   1st Qu.: 768.4     1st Qu.:114.64  
##  Median :12.00   Median : 88.00   Median : 859.0     Median :136.05  
##  Mean   :14.08   Mean   : 88.73   Mean   : 887.0     Mean   :134.49  
##  3rd Qu.:17.00   3rd Qu.: 95.00   3rd Qu.:1007.9     3rd Qu.:151.87  
##  Max.   :90.00   Max.   :100.00   Max.   :1301.5     Max.   :194.78  
##  Total_Risk_Behavior      Rank     
##  Min.   : 45.00      Min.   : 1.0  
##  1st Qu.: 64.00      1st Qu.:13.5  
##  Median : 74.00      Median :26.0  
##  Mean   : 76.49      Mean   :26.0  
##  3rd Qu.: 85.00      3rd Qu.:38.5  
##  Max.   :136.00      Max.   :51.0
sum(is.na(bad_drivers_clean))  
## [1] 0
head(bad_drivers_clean, 5)
## # A tibble: 5 × 10
##   State          Fatal_Collisions Speeding Alcohol Distracted No_Previous
##   <chr>                     <dbl>    <int>   <int>      <dbl>       <int>
## 1 North Dakota               23.9       23      42          1          86
## 2 South Carolina             23.9       38      41          4          81
## 3 West Virginia              23.8       34      28          3          87
## 4 Arkansas                   22.4       18      26          6          95
## 5 Kentucky                   21.4       19      23         22          76
## # ℹ 4 more variables: Insurance_Premiums <dbl>, Insurance_Losses <dbl>,
## #   Total_Risk_Behavior <dbl>, Rank <int>