Importing libraries

library(readr) #importing data files
library(dplyr) #used for data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr) #used for unite function to combine cols
library(tidyverse) #using for merge function
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ purrr     1.0.1
## ✔ ggplot2   3.4.3     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych) #for describing data frame : similar to summary
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(ggplot2)
library(rAmCharts4)

Importing CSV files

raw_circuits <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/circuits.csv")
## Rows: 77 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): circuitRef, name, location, country, alt, url
## dbl (3): circuitId, lat, lng
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_constructor_results <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/constructor_results.csv")
## Rows: 12170 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): status
## dbl (4): constructorResultsId, raceId, constructorId, points
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_constructor_standings <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/constructor_standings.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 12941 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (7): constructorStandingsId, raceId, constructorId, points, position, po...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_constructors <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/constructors.csv")
## Rows: 211 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): constructorRef, name, nationality, url
## dbl (1): constructorId
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_driver_standings <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/driver_standings.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 33902 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (7): driverStandingsId, raceId, driverId, points, position, positionText...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_drivers <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/drivers.csv")
## Rows: 857 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): driverRef, number, code, forename, surname, nationality, url
## dbl  (1): driverId
## date (1): dob
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_lap_times <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/lap_times.csv")
## Rows: 538121 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (5): raceId, driverId, lap, position, milliseconds
## time (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_pit_stops <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/pit_stops.csv")
## Rows: 9634 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): duration
## dbl  (5): raceId, driverId, stop, lap, milliseconds
## time (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_qyalifying <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/qualifying.csv")
## Rows: 9575 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): q1, q2, q3
## dbl (6): qualifyId, raceId, driverId, constructorId, number, position
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_races <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/races.csv")
## Rows: 1102 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): name, date, time, url, fp1_date, fp1_time, fp2_date, fp2_time, fp3...
## dbl  (4): raceId, year, round, circuitId
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_results <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/results.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 25840 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): position, positionText, time, milliseconds, fastestLap, rank, fast...
## dbl (10): resultId, raceId, driverId, constructorId, number, grid, positionO...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_seasons <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/seasons.csv")
## Rows: 74 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): url
## dbl (1): year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_sprint_results <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/sprint_results.csv")
## Rows: 120 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): position, positionText, time, milliseconds, fastestLap, fastestLap...
## dbl (10): resultId, raceId, driverId, constructorId, number, grid, positionO...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_status <- read_csv("C:/Users/kesha/Desktop/DataSets/f1 data/status.csv")
## Rows: 139 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): status
## dbl (1): statusId
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Viewing the dataset

View(raw_drivers)

Checking if dataset is loaded properly

head(raw_results,n=20)
## # A tibble: 20 × 18
##    resultId raceId driverId constructorId number  grid position positionText
##       <dbl>  <dbl>    <dbl>         <dbl>  <dbl> <dbl> <chr>    <chr>       
##  1        1     18        1             1     22     1 "1"      1           
##  2        2     18        2             2      3     5 "2"      2           
##  3        3     18        3             3      7     7 "3"      3           
##  4        4     18        4             4      5    11 "4"      4           
##  5        5     18        5             1     23     3 "5"      5           
##  6        6     18        6             3      8    13 "6"      6           
##  7        7     18        7             5     14    17 "7"      7           
##  8        8     18        8             6      1    15 "8"      8           
##  9        9     18        9             2      4     2 "\\N"    R           
## 10       10     18       10             7     12    18 "\\N"    R           
## 11       11     18       11             8     18    19 "\\N"    R           
## 12       12     18       12             4      6    20 "\\N"    R           
## 13       13     18       13             6      2     4 "\\N"    R           
## 14       14     18       14             9      9     8 "\\N"    R           
## 15       15     18       15             7     11     6 "\\N"    R           
## 16       16     18       16            10     20    22 "\\N"    R           
## 17       17     18       17             9     10    14 "\\N"    R           
## 18       18     18       18            11     16    12 "\\N"    R           
## 19       19     18       19             8     19    21 "\\N"    R           
## 20       20     18       20             5     15     9 "\\N"    R           
## # ℹ 10 more variables: positionOrder <dbl>, points <dbl>, laps <dbl>,
## #   time <chr>, milliseconds <chr>, fastestLap <chr>, rank <chr>,
## #   fastestLapTime <chr>, fastestLapSpeed <chr>, statusId <dbl>

Information about “Result” dataset

results_summary <- summary(raw_results)
results_summary
##     resultId         raceId          driverId     constructorId   
##  Min.   :    1   Min.   :   1.0   Min.   :  1.0   Min.   :  1.00  
##  1st Qu.: 6461   1st Qu.: 293.0   1st Qu.: 56.0   1st Qu.:  6.00  
##  Median :12920   Median : 514.0   Median :163.0   Median : 25.00  
##  Mean   :12921   Mean   : 531.4   Mean   :261.7   Mean   : 48.63  
##  3rd Qu.:19380   3rd Qu.: 784.0   3rd Qu.:360.0   3rd Qu.: 58.00  
##  Max.   :25845   Max.   :1096.0   Max.   :856.0   Max.   :214.00  
##                                                                   
##      number            grid         position         positionText      
##  Min.   :  0.00   Min.   : 0.00   Length:25840       Length:25840      
##  1st Qu.:  7.00   1st Qu.: 5.00   Class :character   Class :character  
##  Median : 15.00   Median :11.00   Mode  :character   Mode  :character  
##  Mean   : 17.79   Mean   :11.18                                        
##  3rd Qu.: 24.00   3rd Qu.:17.00                                        
##  Max.   :208.00   Max.   :34.00                                        
##  NA's   :6                                                             
##  positionOrder       points            laps            time          
##  Min.   : 1.00   Min.   : 0.000   Min.   :  0.00   Length:25840      
##  1st Qu.: 6.00   1st Qu.: 0.000   1st Qu.: 22.00   Class :character  
##  Median :12.00   Median : 0.000   Median : 52.00   Mode  :character  
##  Mean   :12.88   Mean   : 1.877   Mean   : 45.98                     
##  3rd Qu.:18.00   3rd Qu.: 2.000   3rd Qu.: 66.00                     
##  Max.   :39.00   Max.   :50.000   Max.   :200.00                     
##                                                                      
##  milliseconds        fastestLap            rank           fastestLapTime    
##  Length:25840       Length:25840       Length:25840       Length:25840      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  fastestLapSpeed       statusId     
##  Length:25840       Min.   :  1.00  
##  Class :character   1st Qu.:  1.00  
##  Mode  :character   Median : 11.00  
##                     Mean   : 17.57  
##                     3rd Qu.: 14.00  
##                     Max.   :141.00  
## 

Making copies of datasets above

races        <- raw_races
circuits     <- raw_circuits
drivers      <- raw_drivers
constructors <- raw_constructors
results      <- raw_results

Dropping columns from the datasets which won’t be in use

races <- races[,!names(races) %in% c("url",
                                     "fp1_date","fp1_time",
                                     "fp2_date","fp2_time",
                                     "fp3_date","fp3_time",
                                     "quali_date","quali_time",
                                     "sprint_date","sprint_time",
                                     "time")]

circuits <- circuits[,!names(circuits) %in% c("lat","lng",
                                              "alt","url")]

drivers <- drivers[,!names(drivers) %in% c("driverRef","number",
                                           "code","url")]

constructors <- constructors[,!names(constructors) %in% c("url","constructorRef")]

Combining forename and surname of Drivers

drivers <- unite(drivers, driver_name,
                 c(forename,surname,))

drivers
## # A tibble: 857 × 4
##    driverId driver_name        dob        nationality
##       <dbl> <chr>              <date>     <chr>      
##  1        1 Lewis_Hamilton     1985-01-07 British    
##  2        2 Nick_Heidfeld      1977-05-10 German     
##  3        3 Nico_Rosberg       1985-06-27 German     
##  4        4 Fernando_Alonso    1981-07-29 Spanish    
##  5        5 Heikki_Kovalainen  1981-10-19 Finnish    
##  6        6 Kazuki_Nakajima    1985-01-11 Japanese   
##  7        7 Sébastien_Bourdais 1979-02-28 French     
##  8        8 Kimi_Räikkönen     1979-10-17 Finnish    
##  9        9 Robert_Kubica      1984-12-07 Polish     
## 10       10 Timo_Glock         1982-03-18 German     
## # ℹ 847 more rows

Renaming columns

colnames(races)[5] = "race_name"

view(races)

new_circuits <- circuits %>% 
  rename(circuit_name = name , city = location)
view(new_circuits)



new_constructors <- constructors %>% 
  rename(constructors_name = name)
view(new_constructors)

Merging Data frames

merged_df <- merge(results,raw_status, by="statusId")
merged_df <- merge(merged_df,races, by="raceId")
merged_df <- merge(merged_df,drivers, by="driverId")
merged_df <- merge(merged_df,constructors, by="constructorId")
merged_df <- merge(merged_df,circuits, by="circuitId")

Renaming columns in merged data frame

new_merged_df <- merged_df %>% 
  rename(driver_nationality = nationality.x, constructor_nationality = nationality.y)

Driver counts of different nationalities

describe(new_merged_df)
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
##                          vars     n     mean      sd  median  trimmed     mad
## circuitId                   1 25840    23.49   18.57    18.0    20.78   16.31
## constructorId               2 25840    48.63   59.73    25.0    36.50   29.65
## driverId                    3 25840   261.73  268.62   163.0   222.17  195.70
## raceId                      4 25840   531.43  299.44   514.0   527.91  360.27
## statusId                    5 25840    17.57   26.16    11.0    10.90   13.34
## resultId                    6 25840 12921.33 7460.68 12920.5 12920.95 9577.60
## number                      7 25834    17.79   15.10    15.0    15.66   11.86
## grid                        8 25840    11.18    7.24    11.0    10.98    8.90
## position*                   9 25840    11.09   12.74     4.0     9.59    4.45
## positionText*              10 25840    25.92   13.76    31.0    27.40   10.38
## positionOrder              11 25840    12.88    7.71    12.0    12.48    8.90
## points                     12 25840     1.88    4.17     0.0     0.80    0.00
## laps                       13 25840    45.98   29.81    52.0    45.90   26.69
## time*                      14 25840   934.69 1835.35     1.0   472.18    0.00
## milliseconds*              15 25840   965.66 1898.59     1.0   486.40    0.00
## fastestLap*                16 25840    12.08   19.75     1.0     7.84    0.00
## rank*                      17 25840     4.73    7.08     1.0     2.91    0.00
## fastestLapTime*            18 25840   961.90 1839.01     1.0   508.77    0.00
## fastestLapSpeed*           19 25840   993.00 1892.78     1.0   528.47    0.00
## status*                    20 25840    53.13   34.80    60.0    51.34   26.69
## year                       21 25840  1990.25   19.35  1990.0  1990.90   22.24
## round                      22 25840     8.39    4.96     8.0     8.16    5.93
## race_name*                 23 25840    22.87   14.29    21.0    21.89   16.31
## date*                      24 25840   540.92  313.10   538.0   540.75  401.78
## driver_name*               25 25840   455.69  233.76   474.0   462.41  277.25
## dob                        26 25840      NaN      NA      NA      NaN      NA
## driver_nationality*        27 25840    17.32   10.24    19.0    16.62   14.83
## name.x*                    28 25840   116.84   62.63   123.0   118.72   72.65
## constructor_nationality*   29 25840     9.84    5.38     6.0     9.32    4.45
## circuitRef*                30 25840    42.98   20.82    43.0    43.86   26.69
## name.y*                    31 25840    34.99   22.22    29.0    34.15   23.72
## location*                  32 25840    42.91   19.60    44.0    43.87   26.69
## country*                   33 25840    16.63   10.16    14.0    16.24   10.38
##                           min   max range  skew kurtosis    se
## circuitId                   1    79    78  1.13     0.49  0.12
## constructorId               1   214   213  1.53     1.13  0.37
## driverId                    1   856   855  1.13    -0.03  1.67
## raceId                      1  1096  1095  0.10    -1.06  1.86
## statusId                    1   141   140  2.19     3.91  0.16
## resultId                    1 25845 25844  0.00    -1.20 46.41
## number                      0   208   208  2.42     9.71  0.09
## grid                        0    34    34  0.19    -0.93  0.05
## position*                   1    34    33  0.82    -1.10  0.08
## positionText*               1    39    38 -0.75    -1.10  0.09
## positionOrder               1    39    38  0.39    -0.50  0.05
## points                      0    50    50  3.15    11.78  0.03
## laps                        0   200   200  0.73     3.66  0.19
## time*                       1  6839  6838  1.85     2.04 11.42
## milliseconds*               1  7051  7050  1.85     2.04 11.81
## fastestLap*                 1    80    79  1.51     0.84  0.12
## rank*                       1    26    25  1.83     1.98  0.04
## fastestLapTime*             1  6772  6771  1.78     1.79 11.44
## fastestLapSpeed*            1  6938  6937  1.77     1.75 11.77
## status*                     1   137   136  0.16    -0.51  0.22
## year                     1950  2022    72 -0.19    -0.92  0.12
## round                       1    22    21  0.34    -0.82  0.03
## race_name*                  1    53    52  0.53    -0.77  0.09
## date*                       1  1079  1078  0.01    -1.20  1.95
## driver_name*                1   855   854 -0.20    -1.04  1.45
## dob                       Inf  -Inf  -Inf    NA       NA    NA
## driver_nationality*         1    42    41  0.42    -0.63  0.06
## name.x*                     1   210   209 -0.20    -1.11  0.39
## constructor_nationality*    1    24    23  0.85     0.17  0.03
## circuitRef*                 1    76    75 -0.22    -1.08  0.13
## name.y*                     1    76    75  0.37    -1.17  0.14
## location*                   1    74    73 -0.28    -0.85  0.12
## country*                    1    34    33  0.43    -1.07  0.06
driver_country <- new_merged_df %>%
  group_by(driver_nationality) %>%
  summarize(driver_counts = n_distinct(driver_name)) %>% 
  ungroup()

Changing column name

#driver_country %>%
 # rename(driver_counts = driver_name)
driver_country
## # A tibble: 42 × 2
##    driver_nationality driver_counts
##    <chr>                      <int>
##  1 American                     157
##  2 American-Italian               1
##  3 Argentine                     24
##  4 Argentine-Italian              1
##  5 Australian                    17
##  6 Austrian                      15
##  7 Belgian                       23
##  8 Brazilian                     32
##  9 British                      165
## 10 Canadian                      14
## # ℹ 32 more rows

Taking top 6 Driver countries and combining every other country in a new row named “OTHERS”

driver_country1 <- driver_country[driver_country$driver_counts >= 30, ]
driver_country1 <- driver_country1[order(-driver_country1$driver_counts), ]

new_row <- c('Others', (sum(driver_country$driver_counts) - sum(driver_country1$driver_counts)))

driver_country1 <- rbind(driver_country1, new_row)

driver_country1
## # A tibble: 7 × 2
##   driver_nationality driver_counts
##   <chr>              <chr>        
## 1 British            165          
## 2 American           157          
## 3 Italian            99           
## 4 French             73           
## 5 German             50           
## 6 Brazilian          32           
## 7 Others             279

3D Pie Chart for number of drivers of different countries entered in this sport

amPieChart(
  data = driver_country1,
  category = "driver_nationality",
  value = "driver_counts",
  threeD = TRUE,
  variableDepth = TRUE)
## Warning: `bindFillRole()` only works on htmltools::tag() objects (e.g., div(),
## p(), etc.), not objects of type 'shiny.tag.list'.

Analysis of Drivers’ Performance

driver_analysis_df <- merged_df %>%
  group_by(year, driver_name) %>%
  summarise(
    total_points = sum(points),
    race_count = n_distinct(raceId),
    avg_positionOrder = mean(positionOrder),
    std_positionOrder = sd(positionOrder)
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
head(driver_analysis_df)
## # A tibble: 6 × 6
##    year driver_name  total_points race_count avg_positionOrder std_positionOrder
##   <dbl> <chr>               <dbl>      <int>             <dbl>             <dbl>
## 1  1950 Alberto_Asc…           11          4               8.6              7.77
## 2  1950 Alfredo_Pián            0          1              21               NA   
## 3  1950 Bayliss_Lev…            0          1              27               NA   
## 4  1950 Bill_Cantre…            0          1              27               NA   
## 5  1950 Bill_Holland            6          1               2               NA   
## 6  1950 Bill_Schind…            0          1              26               NA

Sorting and Renaming

colnames(driver_analysis_df) <- gsub(" ", "_",
                                     colnames(driver_analysis_df))

colnames(driver_analysis_df) <- gsub("_$", "", 
                                     colnames(driver_analysis_df))

colnames(driver_analysis_df) <- ifelse(colnames(driver_analysis_df) == "year_", 
                                       "year", colnames(driver_analysis_df))

colnames(driver_analysis_df) <- ifelse(colnames(driver_analysis_df) == "driver_name_",
                                       "driver_name", colnames(driver_analysis_df))

Drivers with most points with every year

champion <- driver_analysis_df %>% 
  group_by(year, driver_name) %>% 
  summarise(points_sum = sum(total_points)) %>% 
  ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
champion
## # A tibble: 3,165 × 3
##     year driver_name        points_sum
##    <dbl> <chr>                   <dbl>
##  1  1950 Alberto_Ascari             11
##  2  1950 Alfredo_Pián                0
##  3  1950 Bayliss_Levrett             0
##  4  1950 Bill_Cantrell               0
##  5  1950 Bill_Holland                6
##  6  1950 Bill_Schindler              0
##  7  1950 Bob_Gerard                  0
##  8  1950 Brian_Shawe Taylor          0
##  9  1950 Cecil_Green                 3
## 10  1950 Charles_Pozzi               0
## # ℹ 3,155 more rows

Top 20 drivers year wise in descending order

champion %>% 
  arrange(desc(year)) %>% 
  head(champion,n=20)
## # A tibble: 20 × 3
##     year driver_name      points_sum
##    <dbl> <chr>                 <dbl>
##  1  2022 Alexander_Albon           4
##  2  2022 Carlos_Sainz            228
##  3  2022 Charles_Leclerc         291
##  4  2022 Daniel_Ricciardo         34
##  5  2022 Esteban_Ocon             89
##  6  2022 Fernando_Alonso          81
##  7  2022 George_Russell          262
##  8  2022 Guanyu_Zhou               6
##  9  2022 Kevin_Magnussen          21
## 10  2022 Lance_Stroll             18
## 11  2022 Lando_Norris            116
## 12  2022 Lewis_Hamilton          233
## 13  2022 Max_Verstappen          433
## 14  2022 Mick_Schumacher          12
## 15  2022 Nicholas_Latifi           2
## 16  2022 Nico_Hülkenberg           0
## 17  2022 Nyck_de Vries             2
## 18  2022 Pierre_Gasly             23
## 19  2022 Sebastian_Vettel         37
## 20  2022 Sergio_Pérez            291

Top 10 drivers with most F1 races

most_races <- merged_df %>%
  group_by(driver_name) %>%
  summarise(total_races = n_distinct(raceId)) %>%
  arrange(desc(total_races)) %>%
  head(10) %>%
  rename(total_races = total_races)
most_races
## # A tibble: 10 × 2
##    driver_name        total_races
##    <chr>                    <int>
##  1 Fernando_Alonso            358
##  2 Kimi_Räikkönen             352
##  3 Rubens_Barrichello         326
##  4 Lewis_Hamilton             310
##  5 Jenson_Button              309
##  6 Michael_Schumacher         308
##  7 Sebastian_Vettel           300
##  8 Felipe_Massa               271
##  9 Riccardo_Patrese           257
## 10 Jarno_Trulli               256

Plotting graph for top 10 drivers with most F1 races

ggplot(most_races, aes(x = total_races, y = driver_name, fill = driver_name)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(title = "Top 10 Drivers with most race entries in Formula 1") +
  coord_flip() +
  scale_x_continuous(expand = c(0, 0)) +
  theme(plot.title = element_text(size = 16),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 4),
        axis.title.y = element_blank(),
        axis.title.x = element_text(size = 14)) +
  scale_fill_discrete(guide = FALSE) 
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Most number of laps done by the drivers in their career

most_laps <- merged_df %>%
  group_by(driver_name) %>%
  summarise(total_laps = sum(laps)) %>%
  arrange(desc(total_laps)) %>%
  head(10)
most_laps
## # A tibble: 10 × 2
##    driver_name        total_laps
##    <chr>                   <dbl>
##  1 Fernando_Alonso         19153
##  2 Kimi_Räikkönen          18618
##  3 Lewis_Hamilton          17681
##  4 Michael_Schumacher      16824
##  5 Rubens_Barrichello      16642
##  6 Sebastian_Vettel        16426
##  7 Jenson_Button           16274
##  8 Felipe_Massa            14852
##  9 Sergio_Pérez            13012
## 10 Daniel_Ricciardo        12716

Graph for most number of laps done by drivers

ggplot(most_laps, aes(x = total_laps, y = driver_name, fill = driver_name)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(title = "Top 10 Drivers with most laps in Formula 1") +
  theme_minimal() +
  coord_flip() +
  scale_x_continuous(expand = c(0, 0)) +
  theme(plot.title = element_text(size = 16),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 3.8),
        axis.title.y = element_blank(),
        axis.title.x = element_text(size = 14))

Compare Drivers Average points and deviation

driver_analysis_df2 <- merged_df %>%
  group_by(driver_name) %>%
  summarise(raceId_count = n(),
            positionOrder_mean = mean(positionOrder),
            positionOrder_std = sd(positionOrder)) %>%
  ungroup() %>%
  filter(raceId_count > 100) %>%
  arrange(positionOrder_mean) %>%
  head(10)
driver_analysis_df2
## # A tibble: 10 × 4
##    driver_name        raceId_count positionOrder_mean positionOrder_std
##    <chr>                     <int>              <dbl>             <dbl>
##  1 Lewis_Hamilton              310               4.79              5.64
##  2 Max_Verstappen              163               6.53              6.32
##  3 Michael_Schumacher          308               6.88              7.07
##  4 Sebastian_Vettel            300               7.09              6.41
##  5 Alain_Prost                 202               7.50              7.83
##  6 Valtteri_Bottas             201               7.60              5.66
##  7 Charles_Leclerc             103               8.12              6.01
##  8 Denny_Hulme                 112               8.21              6.30
##  9 Nico_Rosberg                206               8.25              6.25
## 10 Ayrton_Senna                162               8.26              8.28

Graph for average points finished vs SD of finishd position

ggplot(driver_analysis_df2, aes(x = positionOrder_mean, y = positionOrder_std, color = driver_name)) +
  geom_point(size = 4) +
  labs(title = 'Average position finished vs Standard Deviation of position finished') +
  theme_minimal() +
  scale_color_discrete(name = "Driver Name") +
  xlab("Average Position Finished") +
  ylab("Standard Deviation of Position Finished") +
  theme(plot.title = element_text(size = 13),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12))

Constructor Analysis

Converting “\N” values to NA

merged_df<-merged_df %>% 
  mutate(across(where(is.character), ~na_if(.,"\\N")))

Obtaining number of constructors won by countries

constructor_country <- new_merged_df %>%
  group_by(constructor_nationality) %>%
  summarise(constructor_counts = n_distinct(name.x)) %>%
  ungroup()
constructor_country
## # A tibble: 24 × 2
##    constructor_nationality constructor_counts
##    <chr>                                <int>
##  1 American                                38
##  2 Australian                               1
##  3 Austrian                                 1
##  4 Belgian                                  1
##  5 Brazilian                                1
##  6 British                                 86
##  7 Canadian                                 2
##  8 Dutch                                    3
##  9 East German                              1
## 10 French                                  13
## # ℹ 14 more rows

Countries that won more than 9 constructors

constructor_country1 <- constructor_country %>%
  filter(constructor_counts >= 9) %>%
  arrange(desc(constructor_counts))
constructor_country1
## # A tibble: 5 × 2
##   constructor_nationality constructor_counts
##   <chr>                                <int>
## 1 British                                 86
## 2 American                                38
## 3 Italian                                 29
## 4 French                                  13
## 5 German                                  10

Graph of countries winning more tha 9 constructors

amPieChart(
  data = constructor_country1,
  category = "constructor_nationality",
  value = "constructor_counts",
  threeD = TRUE,
  variableDepth = TRUE)
## Warning: `bindFillRole()` only works on htmltools::tag() objects (e.g., div(),
## p(), etc.), not objects of type 'shiny.tag.list'.

A vector with year, team names with total points, race count, mean position order in constructor and standard deviation of position order

con_analysis_df <- new_merged_df %>%
  group_by(year, name.x) %>%
  summarise(total_points = sum(points),
            race_count = n_distinct(raceId),
            positionOrder_mean = mean(positionOrder),
            positionOrder_std = sd(positionOrder)) %>%
  ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
con_analysis_df
## # A tibble: 1,091 × 6
##     year name.x     total_points race_count positionOrder_mean positionOrder_std
##    <dbl> <chr>             <dbl>      <int>              <dbl>             <dbl>
##  1  1950 Adams                 0          1              27                 0   
##  2  1950 Alfa Romeo           89          6               6.45              6.30
##  3  1950 Alta                  0          2              12.7               3.51
##  4  1950 Cooper                0          1              19                NA   
##  5  1950 Deidt                10          1              12                16.5 
##  6  1950 ERA                   0          3              13.2               6.84
##  7  1950 Ewing                 0          1              17                NA   
##  8  1950 Ferrari              21          5               9.47              6.81
##  9  1950 Kurtis Kr…           14          1              15.8              10.6 
## 10  1950 Langley               0          1              16                NA   
## # ℹ 1,081 more rows

Renaming the name column in table

#con_analysis_df <- con_analysis_df %>%
#  rename( constructor_name = name.x)
#con_analysis_df

Top 20 Constructor Champions

#result <- con_champion_df %>% 
#  arrange(desc(year)) %>% 
#  select(year,constructor_name) %>% 
#  head(result,n=20)
#result

Getting race entries by constructors need con_name raceID
grouping con_name every race with 2 entries for 2 cars with team name avg position and sd position for graph same as driver points finishes

con_analysis_df2 <- new_merged_df %>%
  group_by(name.x, raceId) %>%
  summarize(positionOrder_mean = mean(positionOrder),
            positionOrder_std = sd(positionOrder)) %>%
  ungroup()
## `summarise()` has grouped output by 'name.x'. You can override using the
## `.groups` argument.
con_analysis_df2
## # A tibble: 12,568 × 4
##    name.x raceId positionOrder_mean positionOrder_std
##    <chr>   <dbl>              <dbl>             <dbl>
##  1 AFM       814               29.7             3.06 
##  2 AFM       816               14              NA    
##  3 AFM       817               18              NA    
##  4 AFM       822               11.5             3.54 
##  5 AGS       305               18              14.1  
##  6 AGS       306               26.5             2.12 
##  7 AGS       307               27.5             0.707
##  8 AGS       308               26               2.83 
##  9 AGS       309               27.5             0.707
## 10 AGS       310               29               1.41 
## # ℹ 12,558 more rows

New data frame for same

con_analysis_df3 <- con_analysis_df2 %>%
  group_by(name.x) %>%
  summarize(constructor_entry = n(),
            positionOrder_mean = mean(positionOrder_mean),
            positionOrder_std = mean(positionOrder_std)) %>%
  ungroup()
con_analysis_df3
## # A tibble: 210 × 4
##    name.x         constructor_entry positionOrder_mean positionOrder_std
##    <chr>                      <int>              <dbl>             <dbl>
##  1 AFM                            4               18.3             NA   
##  2 AGS                           78               23.5             NA   
##  3 ATS                          116               18.0             NA   
##  4 Adams                          1               27                0   
##  5 Alfa Romeo                   194               13.8             NA   
##  6 AlphaTauri                    61               11.9              4.44
##  7 Alpine F1 Team                44               10.1              3.39
##  8 Alta                           5               13.3             NA   
##  9 Amon                           4               27.4             NA   
## 10 Andrea Moda                    8               30.2             NA   
## # ℹ 200 more rows

Top 20 teams with most race entries

top_constructors <- con_analysis_df3 %>%
  arrange(desc(constructor_entry)) %>%
  head(20)
top_constructors
## # A tibble: 20 × 4
##    name.x      constructor_entry positionOrder_mean positionOrder_std
##    <chr>                   <int>              <dbl>             <dbl>
##  1 Ferrari                  1054               8.34             NA   
##  2 McLaren                   883               9.48             NA   
##  3 Williams                  797              10.9              NA   
##  4 Tyrrell                   433              13.2              NA   
##  5 Renault                   403              11.3              NA   
##  6 Sauber                    395              12.9              NA   
##  7 Team Lotus                395              13.1              NA   
##  8 Red Bull                  348               7.64              4.21
##  9 Minardi                   345              15.9              NA   
## 10 Ligier                    330              13.4              NA   
## 11 Brabham                   308              13.9              NA   
## 12 Arrows                    296              14.3              NA   
## 13 Mercedes                  271               5.76              3.57
## 14 Toro Rosso                268              13.5               3.45
## 15 Benetton                  260              10.2               5.84
## 16 Jordan                    250              13.1               4.75
## 17 Force India               212              11.7               3.33
## 18 BRM                       208              13.1              NA   
## 19 March                     196              16.4              NA   
## 20 Alfa Romeo                194              13.8              NA
ggplot(top_constructors, aes(y = name.x, x = constructor_entry, fill = name.x)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(title = "Top 20 Constructors with most number of race entries") +
  theme_minimal() +
  coord_flip() +
  theme(plot.title = element_text(size = 13),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 4))

Filtering and sorting top performing teams in constructors with average position finished and standard deviation of position finished

con_analysis_df3 <- con_analysis_df3[con_analysis_df3$constructor_entry > 200, ]
con_analysis_df3 <- con_analysis_df3 %>%
  arrange(positionOrder_mean) %>%
  head(10)
ggplot(con_analysis_df3, aes(x = positionOrder_mean, y = positionOrder_std, color = name.x)) +
  geom_point(size = 5) +
  labs(title = "Constructor's Average Position Finished vs Standard Deviation of Position Finished") +
  theme_minimal() +
  guides(color = guide_legend(title = "Constructors Name"))
## Warning: Removed 6 rows containing missing values (`geom_point()`).

CIRCUIT ANALYSIS

Making new data frames extracting data from the dataset

fastestlap <- merge(races, circuits, by = 'circuitId')


fastestlap <- merge(fastestlap, results, by = 'raceId')

Replacing “\N” values with NA

fastestlap<-fastestlap %>% 
  mutate(across(where(is.character), ~na_if(.,"\\N")))

Top 20 circuits with most number of races hosted

most_circuits <- fastestlap %>% 
  group_by(year, name) %>%
  summarize(count = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
m <- most_circuits %>%
  count(name) %>%
  arrange(desc(n)) %>%
  head(20) %>%
  rename(count = n, circuit_name = name)
m
## # A tibble: 20 × 2
##    circuit_name                   count
##    <chr>                          <int>
##  1 Autodromo Nazionale di Monza      72
##  2 Circuit de Monaco                 68
##  3 Silverstone Circuit               56
##  4 Circuit de Spa-Francorchamps      55
##  5 Circuit Gilles Villeneuve         41
##  6 Nürburgring                       41
##  7 Autódromo José Carlos Pace        39
##  8 Hockenheimring                    37
##  9 Hungaroring                       37
## 10 Red Bull Ring                     34
## 11 Circuit Park Zandvoort            32
## 12 Circuit de Barcelona-Catalunya    32
## 13 Suzuka Circuit                    32
## 14 Autodromo Enzo e Dino Ferrari     30
## 15 Albert Park Grand Prix Circuit    25
## 16 Autódromo Hermanos Rodríguez      22
## 17 Autódromo Juan y Oscar Gálvez     20
## 18 Kyalami                           20
## 19 Watkins Glen                      20
## 20 Indianapolis Motor Speedway       19

BarPlot

ggplot(m, aes(y = circuit_name, x = count), fill = circuit_name) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(title = "Top 20 Circuits with most number of races hosted") +
  theme_minimal()

Races hosted from 2004 to 2022

fastestlap <- fastestlap %>%
  filter(year >= 2004, year <= 2022)
host <- fastestlap %>%
  group_by(year, name) %>%
  summarize(count = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
host
## # A tibble: 362 × 3
##     year name                           count
##    <dbl> <chr>                          <int>
##  1  2004 Albert Park Grand Prix Circuit    20
##  2  2004 Autodromo Enzo e Dino Ferrari     20
##  3  2004 Autodromo Nazionale di Monza      20
##  4  2004 Autódromo José Carlos Pace        20
##  5  2004 Bahrain International Circuit     20
##  6  2004 Circuit Gilles Villeneuve         20
##  7  2004 Circuit de Barcelona-Catalunya    20
##  8  2004 Circuit de Monaco                 20
##  9  2004 Circuit de Nevers Magny-Cours     20
## 10  2004 Circuit de Spa-Francorchamps      20
## # ℹ 352 more rows
circuit_name_counts <- table(host$name)
circuit_name_counts
## 
##       Albert Park Grand Prix Circuit        Autodromo Enzo e Dino Ferrari 
##                                   17                                    6 
##         Autódromo Hermanos Rodríguez   Autódromo Internacional do Algarve 
##                                    7                                    2 
## Autodromo Internazionale del Mugello           Autódromo José Carlos Pace 
##                                    1                                   18 
##         Autodromo Nazionale di Monza        Bahrain International Circuit 
##                                   19                                   18 
##                    Baku City Circuit          Buddh International Circuit 
##                                    6                                    3 
##       Circuit de Barcelona-Catalunya                    Circuit de Monaco 
##                                   19                                   18 
##        Circuit de Nevers Magny-Cours         Circuit de Spa-Francorchamps 
##                                    5                                   18 
##            Circuit Gilles Villeneuve              Circuit of the Americas 
##                                   16                                   10 
##               Circuit Park Zandvoort                  Circuit Paul Ricard 
##                                    2                                    4 
##                        Fuji Speedway                       Hockenheimring 
##                                    2                                   10 
##                          Hungaroring          Indianapolis Motor Speedway 
##                                   19                                    4 
##                        Istanbul Park              Jeddah Corniche Circuit 
##                                    9                                    2 
##         Korean International Circuit         Losail International Circuit 
##                                    4                                    1 
##            Marina Bay Street Circuit        Miami International Autodrome 
##                                   13                                    1 
##                          Nürburgring                        Red Bull Ring 
##                                    8                                    9 
##         Sepang International Circuit       Shanghai International Circuit 
##                                   14                                   16 
##                  Silverstone Circuit                       Sochi Autodrom 
##                                   19                                    8 
##                       Suzuka Circuit              Valencia Street Circuit 
##                                   15                                    5 
##                   Yas Marina Circuit 
##                                   14

Top 6 popular circuits from 2004

# Autodromo Nazionale di Monza
lap_time_monza <- fastestlap[fastestlap$name == 'Autodromo Nazionale di Monza', ]

# Circuit de Monaco
lap_time_monaco <- fastestlap[fastestlap$name == 'Circuit de Monaco', ]

# Silverstone Circuit
lap_time_silverstone <- fastestlap[fastestlap$name == 'Silverstone Circuit', ]

# Circuit de Barcelona-Catalunya
lap_time_catalunya <- fastestlap[fastestlap$name == 'Circuit de Barcelona-Catalunya', ]

# Hungaroring
lap_time_hungaroring <- fastestlap[fastestlap$name == 'Hungaroring', ]

# Circuit de Spa-Francorchamps
lap_time_spa <- fastestlap[fastestlap$name == 'Circuit de Spa-Francorchamps', ]

Grouping by year and calculating minimum fastest lap of tracks

# Silverstone
lap <- lap_time_silverstone %>%
  group_by(year) %>%
  summarize(minfastestlap = min(fastestLapTime)) %>%
  ungroup()
lap
## # A tibble: 19 × 2
##     year minfastestlap
##    <dbl> <chr>        
##  1  2004 1:18.739     
##  2  2005 1:20.502     
##  3  2006 <NA>         
##  4  2007 1:20.638     
##  5  2008 <NA>         
##  6  2009 1:20.735     
##  7  2010 1:30.874     
##  8  2011 1:34.908     
##  9  2012 <NA>         
## 10  2013 1:33.401     
## 11  2014 <NA>         
## 12  2015 <NA>         
## 13  2016 1:35.548     
## 14  2017 <NA>         
## 15  2018 <NA>         
## 16  2019 1:27.369     
## 17  2020 <NA>         
## 18  2021 <NA>         
## 19  2022 <NA>
# Monaco
lap1 <- lap_time_monaco %>%
  group_by(year) %>%
  summarize(minfastestlap = min(fastestLapTime)) %>%
  ungroup()
lap1
## # A tibble: 18 × 2
##     year minfastestlap
##    <dbl> <chr>        
##  1  2004 <NA>         
##  2  2005 1:15.842     
##  3  2006 1:15.143     
##  4  2007 <NA>         
##  5  2008 1:16.689     
##  6  2009 1:15.154     
##  7  2010 <NA>         
##  8  2011 <NA>         
##  9  2012 <NA>         
## 10  2013 1:16.577     
## 11  2014 <NA>         
## 12  2015 1:18.063     
## 13  2016 1:17.939     
## 14  2017 1:14.820     
## 15  2018 1:14.260     
## 16  2019 1:14.279     
## 17  2021 <NA>         
## 18  2022 1:14.693
# Monza
lap2 <- lap_time_monza %>%
  group_by(year) %>%
  summarize(minfastestlap = min(fastestLapTime)) %>%
  ungroup()
lap2
## # A tibble: 19 × 2
##     year minfastestlap
##    <dbl> <chr>        
##  1  2004 <NA>         
##  2  2005 1:21.504     
##  3  2006 1:22.559     
##  4  2007 <NA>         
##  5  2008 1:28.047     
##  6  2009 <NA>         
##  7  2010 <NA>         
##  8  2011 <NA>         
##  9  2012 1:27.239     
## 10  2013 <NA>         
## 11  2014 1:28.004     
## 12  2015 <NA>         
## 13  2016 <NA>         
## 14  2017 1:23.361     
## 15  2018 <NA>         
## 16  2019 1:21.779     
## 17  2020 1:22.746     
## 18  2021 <NA>         
## 19  2022 1:24.030
# Catalunya
lap3 <- lap_time_catalunya %>%
  group_by(year) %>%
  summarize(minfastestlap = min(fastestLapTime)) %>%
  ungroup()
lap3
## # A tibble: 19 × 2
##     year minfastestlap
##    <dbl> <chr>        
##  1  2004 1:17.450     
##  2  2005 1:15.641     
##  3  2006 1:16.648     
##  4  2007 <NA>         
##  5  2008 <NA>         
##  6  2009 <NA>         
##  7  2010 <NA>         
##  8  2011 1:26.727     
##  9  2012 1:26.250     
## 10  2013 1:26.217     
## 11  2014 1:28.918     
## 12  2015 1:28.270     
## 13  2016 <NA>         
## 14  2017 <NA>         
## 15  2018 <NA>         
## 16  2019 1:18.492     
## 17  2020 1:18.183     
## 18  2021 1:18.149     
## 19  2022 1:24.108
# Hungaroring
lap4 <- lap_time_hungaroring %>%
  group_by(year) %>%
  summarize(minfastestlap = min(fastestLapTime)) %>%
  ungroup()
lap4
## # A tibble: 19 × 2
##     year minfastestlap
##    <dbl> <chr>        
##  1  2004 1:19.071     
##  2  2005 <NA>         
##  3  2006 <NA>         
##  4  2007 1:20.047     
##  5  2008 1:21.195     
##  6  2009 <NA>         
##  7  2010 <NA>         
##  8  2011 1:23.415     
##  9  2012 1:24.136     
## 10  2013 1:24.069     
## 11  2014 1:25.724     
## 12  2015 1:24.821     
## 13  2016 1:23.086     
## 14  2017 <NA>         
## 15  2018 <NA>         
## 16  2019 1:17.103     
## 17  2020 1:16.627     
## 18  2021 <NA>         
## 19  2022 1:21.386
# Spa
lap5 <- lap_time_spa %>%
  group_by(year) %>%
  summarize(minfastestlap = min(fastestLapTime)) %>%
  ungroup()
lap5
## # A tibble: 18 × 2
##     year minfastestlap
##    <dbl> <chr>        
##  1  2004 <NA>         
##  2  2005 1:51.453     
##  3  2007 <NA>         
##  4  2008 1:47.930     
##  5  2009 <NA>         
##  6  2010 <NA>         
##  7  2011 <NA>         
##  8  2012 <NA>         
##  9  2013 1:50.756     
## 10  2014 <NA>         
## 11  2015 <NA>         
## 12  2016 <NA>         
## 13  2017 <NA>         
## 14  2018 <NA>         
## 15  2019 <NA>         
## 16  2020 <NA>         
## 17  2021 <NA>         
## 18  2022 <NA>

Plotting line graph against for tracks against best time over the years

ggplot() +
  geom_line(data = lap, aes(x = year, y = minfastestlap), color = "blue", group = 1) +
  geom_line(data = lap1, aes(x = year, y = minfastestlap), color = "orange", group = 1) +
  geom_line(data = lap2, aes(x = year, y = minfastestlap), color = "green", group = 1) +
  geom_line(data = lap3, aes(x = year, y = minfastestlap), color = "red", group = 1) +
  geom_line(data = lap4, aes(x = year, y = minfastestlap), color = "purple", group = 1) +
  geom_line(data = lap5, aes(x = year, y = minfastestlap), color = "pink", group = 1) +
  scale_x_continuous(breaks = seq(min(lap$year), max(lap$year), by = 1)) +
  labs(y = "Fastest lap Time", x = "Year", title = "Lap time over the years") +
  theme_minimal() +
  scale_color_manual(values = c("Silverstone"="blue", "Monaco"="orange","Monza"= "green","Catalunya"= "red","Hungaroring"= "purple","Spa"= "pink")) +
  guides(color = guide_legend(title = "Circuits")) +
  theme(legend.position = "right")
## Warning: Removed 10 rows containing missing values (`geom_line()`).
## Warning: Removed 7 rows containing missing values (`geom_line()`).
## Warning: Removed 10 rows containing missing values (`geom_line()`).
## Warning: Removed 7 rows containing missing values (`geom_line()`).
## Removed 7 rows containing missing values (`geom_line()`).
## Warning: Removed 15 rows containing missing values (`geom_line()`).