run the car data set
data(cars)
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
Max daily close price for BTC
#install.packages("jsonlite")
library(jsonlite)
BTC2 <- fromJSON("https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100")[[6]][[4]]
BTCurl <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"
BTC <- fromJSON(BTCurl)[[6]][[4]]
#==============================================================================
max(BTC$close)
## [1] 96945.09
Uploading data and downloading libraries:
remotes::install_github('kyleGrealis/nascaR.data')
## Using GitHub PAT from the git credential store.
## Skipping install of 'nascaR.data' from a github remote, the SHA1 (00ce6e30) has not changed since last install.
## Use `force = TRUE` to force installation
#install.packages('nascaR.data')
library(nascaR.data)
cup <- load_series("cup")
summary(cup)
## Season Race Track Name
## Min. :1949 Min. : 1.0 Length:100254 Length:100254
## 1st Qu.:1969 1st Qu.: 9.0 Class :character Class :character
## Median :1990 Median :18.0 Mode :character Mode :character
## Mean :1989 Mean :19.4
## 3rd Qu.:2008 3rd Qu.:28.0
## Max. :2026 Max. :62.0
##
## Length Surface Finish Start
## Min. :0.200 Length:100254 Min. : 1.00 Min. : 0.00
## 1st Qu.:0.548 Class :character 1st Qu.: 9.00 1st Qu.: 9.00
## Median :1.366 Mode :character Median :18.00 Median :18.00
## Mean :1.386 Mean :19.35 Mean :19.42
## 3rd Qu.:2.000 3rd Qu.:29.00 3rd Qu.:29.00
## Max. :4.170 Max. :82.00 Max. :82.00
## NA's :115 NA's :3079
## Car Driver Make Pts
## Length:100254 Length:100254 Length:100254 Min. : 0.0
## Class :character Class :character Class :character 1st Qu.: 35.0
## Mode :character Mode :character Mode :character Median : 79.0
## Mean : 121.6
## 3rd Qu.: 129.0
## Max. :6500.0
## NA's :10196
## Laps Led Status Team
## Min. : 0.0 Min. : 0.000 Length:100254 Length:100254
## 1st Qu.:135.0 1st Qu.: 0.000 Class :character Class :character
## Median :200.0 Median : 0.000 Mode :character Mode :character
## Mean :235.4 Mean : 7.531
## 3rd Qu.:333.0 3rd Qu.: 0.000
## Max. :515.0 Max. :495.000
## NA's :2239 NA's :1
## S1 S2 S3 Rating
## Min. : 1.0 Min. : 1.0 Min. : NA Min. : 0.0
## 1st Qu.: 3.0 1st Qu.: 3.0 1st Qu.: NA 1st Qu.: 46.7
## Median : 5.5 Median : 5.5 Median : NA Median : 67.4
## Mean : 5.5 Mean : 5.5 Mean :NaN Mean : 69.5
## 3rd Qu.: 8.0 3rd Qu.: 8.0 3rd Qu.: NA 3rd Qu.: 89.2
## Max. :10.0 Max. :10.0 Max. : NA Max. :150.0
## NA's :97344 NA's :97344 NA's :100254 NA's :69331
## Win
## Min. :0.00000
## 1st Qu.:0.00000
## Median :0.00000
## Mean :0.02818
## 3rd Qu.:0.00000
## Max. :1.00000
##
library(forcats)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(scales)
# 1. Filter the dataset for all race wins
# In this package, the column names are capitalized: 'Finish' and 'Driver'
all_wins <- subset(cup, Finish == 1)
# 2. Count the wins for each driver
win_leaderboard <- table(all_wins$Driver)
# 3. Find the maximum value and the driver's name
max_wins <- max(win_leaderboard)
winningest_driver <- names(win_leaderboard)[win_leaderboard == max_wins]
# 4. Print the result
cat("The most winningest NASCAR driver of all time is:", winningest_driver, "\n")
## The most winningest NASCAR driver of all time is: Richard Petty
cat("Total Career Wins:", max_wins)
## Total Career Wins: 200
#install.packages("tidyr")
library(tidyr)
df_clean <- cup%>%
drop_na("Length")
summary(df_clean)
## Season Race Track Name
## Min. :1949 Min. : 1.00 Length:100139 Length:100139
## 1st Qu.:1969 1st Qu.: 9.00 Class :character Class :character
## Median :1990 Median :19.00 Mode :character Mode :character
## Mean :1989 Mean :19.41
## 3rd Qu.:2008 3rd Qu.:28.00
## Max. :2026 Max. :62.00
##
## Length Surface Finish Start
## Min. :0.200 Length:100139 Min. : 1.00 Min. : 0.00
## 1st Qu.:0.548 Class :character 1st Qu.: 9.00 1st Qu.: 9.00
## Median :1.366 Mode :character Median :18.00 Median :18.00
## Mean :1.386 Mean :19.35 Mean :19.42
## 3rd Qu.:2.000 3rd Qu.:29.00 3rd Qu.:29.00
## Max. :4.170 Max. :82.00 Max. :82.00
## NA's :3079
## Car Driver Make Pts
## Length:100139 Length:100139 Length:100139 Min. : 0.0
## Class :character Class :character Class :character 1st Qu.: 35.0
## Mode :character Mode :character Mode :character Median : 80.0
## Mean : 121.7
## 3rd Qu.: 129.0
## Max. :6500.0
## NA's :10196
## Laps Led Status Team
## Min. : 0.0 Min. : 0.000 Length:100139 Length:100139
## 1st Qu.:135.0 1st Qu.: 0.000 Class :character Class :character
## Median :200.0 Median : 0.000 Mode :character Mode :character
## Mean :235.5 Mean : 7.534
## 3rd Qu.:333.0 3rd Qu.: 0.000
## Max. :515.0 Max. :495.000
## NA's :2239 NA's :1
## S1 S2 S3 Rating
## Min. : 1.0 Min. : 1.0 Min. : NA Min. : 0.0
## 1st Qu.: 3.0 1st Qu.: 3.0 1st Qu.: NA 1st Qu.: 46.7
## Median : 5.5 Median : 5.5 Median : NA Median : 67.4
## Mean : 5.5 Mean : 5.5 Mean :NaN Mean : 69.5
## 3rd Qu.: 8.0 3rd Qu.: 8.0 3rd Qu.: NA 3rd Qu.: 89.2
## Max. :10.0 Max. :10.0 Max. : NA Max. :150.0
## NA's :97239 NA's :97239 NA's :100139 NA's :69294
## Win
## Min. :0.00000
## 1st Qu.:0.00000
## Median :0.00000
## Mean :0.02818
## 3rd Qu.:0.00000
## Max. :1.00000
##
max_length <- max(df_clean$Length, na.rm = TRUE)
longest_track_name <- unique(df_clean$Track[df_clean$Length == max_length])
cat("The longest NASCAR track is:", longest_track_name, "\n")
## The longest NASCAR track is: Daytona Beach & Road Course
cat("Track Length", max_length, "miles")
## Track Length 4.17 miles
# Calculate the total laps per driver
most_laps_driver <- cup %>%
group_by(Driver) %>%
summarise(total_laps_run = sum(Laps, na.rm = TRUE)) %>%
arrange(desc(total_laps_run)) %>%
slice(1:10) # Selects the top row (the driver with the most laps)
# Print the result
print(most_laps_driver)
## # A tibble: 10 × 2
## Driver total_laps_run
## <chr> <int>
## 1 Richard Petty 307846
## 2 Ricky Rudd 259744
## 3 Mark Martin 255043
## 4 Terry Labonte 254803
## 5 Bill Elliott 238923
## 6 Darrell Waltrip 237774
## 7 Kyle Petty 232575
## 8 Kevin Harvick 232413
## 9 Dave Marcis 231382
## 10 Jeff Gordon 231223
ggplot(most_laps_driver, aes(x = fct_reorder(Driver, total_laps_run), y = total_laps_run)) +
geom_text(aes(label = comma(total_laps_run)),
hjust = -0.1,
size = 3.5,
color = "black") +
geom_col(fill = "steelblue") +
coord_flip(ylim = c(225000, 310000)) + # Flips the axes to make driver names easier to read
labs(
title = "Top 10 NASCAR Cup Series Drivers by Laps Ran",
x = "Driver",
y = "Total Laps Ran"
) +
theme_minimal()
daytona_wins <- cup %>%
filter(Track == "Daytona International Speedway", Win == 1) %>%
group_by(Driver) %>%
summarise(Wins = n()) %>%
arrange(desc(Wins)) %>%
slice_head(n = 10)
# Bar Graph
ggplot(daytona_wins, aes(x = reorder(Driver, Wins), y = Wins)) +
geom_col(fill = "#136011") +
geom_text(aes(label = comma(Wins)),
hjust = -0.1,
size = 3.5,
color = "black") +
coord_flip() +
labs(
title = "Top 10 Cup Series Winners at Daytona",
subtitle = "Includes Daytona 500 and summer races",
x = "Driver",
y = "Number of Wins"
) +
theme_minimal()