Import data

# excel file
data <- read_excel("../00_data/myData.xlsx")
data
## # A tibble: 20 × 4
##    raceId fastestLapTime driverId  laps
##     <dbl> <chr>             <dbl> <dbl>
##  1   1056 "1:14.621"          832    78
##  2   1056 "1:14.649"          830    78
##  3   1056 "1:14.670"          846    78
##  4   1056 "1:14.552"          815    78
##  5   1056 "1:15.316"           20    78
##  6   1056 "1:15.412"          842    78
##  7   1056 "1:12.909"            1    78
##  8   1056 "1:14.674"          840    77
##  9   1056 "1:15.316"          839    77
## 10   1056 "1:15.331"          841    77
## 11   1056 "1:14.971"            8    77
## 12   1056 "1:14.578"          817    77
## 13   1056 "1:15.026"            4    77
## 14   1056 "1:15.539"          847    77
## 15   1056 "1:15.573"          849    77
## 16   1056 "1:14.037"          852    77
## 17   1056 "1:16.866"          853    75
## 18   1056 "1:16.425"          854    75
## 19   1056 "1:15.706"          822    29
## 20   1056 "\\N"               844     0

Apply the following dplyr verbs to your data

Filter rows

filter(data, raceId == 1056, driverId == 1)
## # A tibble: 1 × 4
##   raceId fastestLapTime driverId  laps
##    <dbl> <chr>             <dbl> <dbl>
## 1   1056 1:12.909              1    78
filter(data, raceId == 1056, driverId == 4)
## # A tibble: 1 × 4
##   raceId fastestLapTime driverId  laps
##    <dbl> <chr>             <dbl> <dbl>
## 1   1056 1:15.026              4    77

Arrange rows

arrange(data, desc(fastestLapTime))
## # A tibble: 20 × 4
##    raceId fastestLapTime driverId  laps
##     <dbl> <chr>             <dbl> <dbl>
##  1   1056 "1:16.866"          853    75
##  2   1056 "1:16.425"          854    75
##  3   1056 "1:15.706"          822    29
##  4   1056 "1:15.573"          849    77
##  5   1056 "1:15.539"          847    77
##  6   1056 "1:15.412"          842    78
##  7   1056 "1:15.331"          841    77
##  8   1056 "1:15.316"           20    78
##  9   1056 "1:15.316"          839    77
## 10   1056 "1:15.026"            4    77
## 11   1056 "1:14.971"            8    77
## 12   1056 "1:14.674"          840    77
## 13   1056 "1:14.670"          846    78
## 14   1056 "1:14.649"          830    78
## 15   1056 "1:14.621"          832    78
## 16   1056 "1:14.578"          817    77
## 17   1056 "1:14.552"          815    78
## 18   1056 "1:14.037"          852    77
## 19   1056 "1:12.909"            1    78
## 20   1056 "\\N"               844     0

Select columns

select(data, raceId, fastestLapTime, driverId, laps)
## # A tibble: 20 × 4
##    raceId fastestLapTime driverId  laps
##     <dbl> <chr>             <dbl> <dbl>
##  1   1056 "1:14.621"          832    78
##  2   1056 "1:14.649"          830    78
##  3   1056 "1:14.670"          846    78
##  4   1056 "1:14.552"          815    78
##  5   1056 "1:15.316"           20    78
##  6   1056 "1:15.412"          842    78
##  7   1056 "1:12.909"            1    78
##  8   1056 "1:14.674"          840    77
##  9   1056 "1:15.316"          839    77
## 10   1056 "1:15.331"          841    77
## 11   1056 "1:14.971"            8    77
## 12   1056 "1:14.578"          817    77
## 13   1056 "1:15.026"            4    77
## 14   1056 "1:15.539"          847    77
## 15   1056 "1:15.573"          849    77
## 16   1056 "1:14.037"          852    77
## 17   1056 "1:16.866"          853    75
## 18   1056 "1:16.425"          854    75
## 19   1056 "1:15.706"          822    29
## 20   1056 "\\N"               844     0
select(data, fastestLapTime, laps)
## # A tibble: 20 × 2
##    fastestLapTime  laps
##    <chr>          <dbl>
##  1 "1:14.621"        78
##  2 "1:14.649"        78
##  3 "1:14.670"        78
##  4 "1:14.552"        78
##  5 "1:15.316"        78
##  6 "1:15.412"        78
##  7 "1:12.909"        78
##  8 "1:14.674"        77
##  9 "1:15.316"        77
## 10 "1:15.331"        77
## 11 "1:14.971"        77
## 12 "1:14.578"        77
## 13 "1:15.026"        77
## 14 "1:15.539"        77
## 15 "1:15.573"        77
## 16 "1:14.037"        77
## 17 "1:16.866"        75
## 18 "1:16.425"        75
## 19 "1:15.706"        29
## 20 "\\N"              0

Add columns

mutate(data, results = asc(fastestLapTime))

Summarize by groups

data %>%
    
    # Group by number of laps completed
    group_by(laps) %>% 
    
    # Sort it
    arrange(laps)
## # A tibble: 20 × 4
## # Groups:   laps [5]
##    raceId fastestLapTime driverId  laps
##     <dbl> <chr>             <dbl> <dbl>
##  1   1056 "\\N"               844     0
##  2   1056 "1:15.706"          822    29
##  3   1056 "1:16.866"          853    75
##  4   1056 "1:16.425"          854    75
##  5   1056 "1:14.674"          840    77
##  6   1056 "1:15.316"          839    77
##  7   1056 "1:15.331"          841    77
##  8   1056 "1:14.971"            8    77
##  9   1056 "1:14.578"          817    77
## 10   1056 "1:15.026"            4    77
## 11   1056 "1:15.539"          847    77
## 12   1056 "1:15.573"          849    77
## 13   1056 "1:14.037"          852    77
## 14   1056 "1:14.621"          832    78
## 15   1056 "1:14.649"          830    78
## 16   1056 "1:14.670"          846    78
## 17   1056 "1:14.552"          815    78
## 18   1056 "1:15.316"           20    78
## 19   1056 "1:15.412"          842    78
## 20   1056 "1:12.909"            1    78