Import data
# excel file
data <- read_excel("../00_data/Data3.xlsx")
data
## # A tibble: 8,474 × 9
## player_id first_name last_name birth_date birth_city birth_country
## <dbl> <chr> <chr> <dttm> <chr> <chr>
## 1 8467867 Bryan Adams 1977-03-20 00:00:00 Fort St. … CAN
## 2 8445176 Donald Audette 1969-09-23 00:00:00 Laval CAN
## 3 8460014 Eric Bertrand 1975-04-16 00:00:00 St-Ephrem CAN
## 4 8460510 Jason Botterill 1976-05-19 00:00:00 Edmonton CAN
## 5 8459596 Andrew Brunette 1973-08-24 00:00:00 Sudbury CAN
## 6 8445733 Kelly Buchberger 1966-12-02 00:00:00 Langenburg CAN
## 7 8460573 Hnat Domenichel… 1976-02-17 00:00:00 Edmonton CAN
## 8 8459450 Shean Donovan 1975-01-22 00:00:00 Timmins CAN
## 9 8446675 Nelson Emerson 1967-08-17 00:00:00 Hamilton CAN
## 10 8446823 Ray Ferraro 1964-08-23 00:00:00 Trail CAN
## # ℹ 8,464 more rows
## # ℹ 3 more variables: birth_state_province <chr>, birth_year <dbl>,
## # birth_month <dbl>
Apply the following dplyr verbs to your data
Filter rows
filter(data, birth_country == "CAN", birth_month == 2)
## # A tibble: 533 × 9
## player_id first_name last_name birth_date birth_city birth_country
## <dbl> <chr> <chr> <dttm> <chr> <chr>
## 1 8460573 Hnat Domenichel… 1976-02-17 00:00:00 Edmonton CAN
## 2 8449754 Gord Murphy 1967-02-23 00:00:00 Willowdale CAN
## 3 8451392 Jarrod Skalde 1971-02-26 00:00:00 Niagara F… CAN
## 4 8467658 Dan Snyder 1978-02-23 00:00:00 Kitchener CAN
## 5 8459159 Chris Herperger 1974-02-24 00:00:00 Esterhazy CAN
## 6 8467387 Joe Dipenta 1979-02-25 00:00:00 Barrie CAN
## 7 8460649 Serge Aubin 1975-02-15 00:00:00 Val-d'Or CAN
## 8 8462211 Jean-Luc Grand-Pier… 1977-02-02 00:00:00 Montréal CAN
## 9 8467333 Rico Fata 1980-02-12 00:00:00 Sault Ste… CAN
## 10 8470601 Braydon Coburn 1985-02-27 00:00:00 Shaunavon CAN
## # ℹ 523 more rows
## # ℹ 3 more variables: birth_state_province <chr>, birth_year <dbl>,
## # birth_month <dbl>
Arrange rows
arrange(data, birth_date)
## # A tibble: 8,474 × 9
## player_id first_name last_name birth_date birth_city birth_country
## <dbl> <chr> <chr> <dttm> <chr> <chr>
## 1 8446662 Harold Halderson 1900-01-06 00:00:00 Winnipeg CAN
## 2 8450099 Herb Rheaume 1900-01-12 00:00:00 Mason CAN
## 3 8449199 Billy Stuart 1900-02-01 00:00:00 Sackville CAN
## 4 8445644 Charlie Cotch 1900-02-21 00:00:00 Sarnia CAN
## 5 8445818 Stan Crossett 1900-04-18 00:00:00 Tillsonbu… CAN
## 6 8446613 Fred Gordon 1900-05-06 00:00:00 Fleming CAN
## 7 8447573 Reg Mackey 1900-05-07 00:00:00 Ottawa CAN
## 8 8444855 Clarence Abel 1900-05-28 00:00:00 Sault Ste… USA
## 9 8447741 Duke McCurry 1900-06-13 00:00:00 Toronto CAN
## 10 8448237 Leo Quenneville 1900-06-15 00:00:00 St-Anicet CAN
## # ℹ 8,464 more rows
## # ℹ 3 more variables: birth_state_province <chr>, birth_year <dbl>,
## # birth_month <dbl>
Select columns
select(data, birth_country:birth_state_province)
## # A tibble: 8,474 × 2
## birth_country birth_state_province
## <chr> <chr>
## 1 CAN British Columbia
## 2 CAN Quebec
## 3 CAN Quebec
## 4 CAN Alberta
## 5 CAN Ontario
## 6 CAN Saskatchewan
## 7 CAN Alberta
## 8 CAN Ontario
## 9 CAN Ontario
## 10 CAN British Columbia
## # ℹ 8,464 more rows
Add columns
Summarize by groups
summarise(data, mean(birth_month))
## # A tibble: 1 × 1
## `mean(birth_month)`
## <dbl>
## 1 5.99