Import data

# excel file
data <- read_excel("../00_data/Data3.xlsx")
data
## # A tibble: 8,474 × 9
##    player_id first_name last_name   birth_date          birth_city birth_country
##        <dbl> <chr>      <chr>       <dttm>              <chr>      <chr>        
##  1   8467867 Bryan      Adams       1977-03-20 00:00:00 Fort St. … CAN          
##  2   8445176 Donald     Audette     1969-09-23 00:00:00 Laval      CAN          
##  3   8460014 Eric       Bertrand    1975-04-16 00:00:00 St-Ephrem  CAN          
##  4   8460510 Jason      Botterill   1976-05-19 00:00:00 Edmonton   CAN          
##  5   8459596 Andrew     Brunette    1973-08-24 00:00:00 Sudbury    CAN          
##  6   8445733 Kelly      Buchberger  1966-12-02 00:00:00 Langenburg CAN          
##  7   8460573 Hnat       Domenichel… 1976-02-17 00:00:00 Edmonton   CAN          
##  8   8459450 Shean      Donovan     1975-01-22 00:00:00 Timmins    CAN          
##  9   8446675 Nelson     Emerson     1967-08-17 00:00:00 Hamilton   CAN          
## 10   8446823 Ray        Ferraro     1964-08-23 00:00:00 Trail      CAN          
## # ℹ 8,464 more rows
## # ℹ 3 more variables: birth_state_province <chr>, birth_year <dbl>,
## #   birth_month <dbl>

Apply the following dplyr verbs to your data

Filter rows

filter(data, birth_country == "CAN", birth_month == 2)
## # A tibble: 533 × 9
##    player_id first_name last_name   birth_date          birth_city birth_country
##        <dbl> <chr>      <chr>       <dttm>              <chr>      <chr>        
##  1   8460573 Hnat       Domenichel… 1976-02-17 00:00:00 Edmonton   CAN          
##  2   8449754 Gord       Murphy      1967-02-23 00:00:00 Willowdale CAN          
##  3   8451392 Jarrod     Skalde      1971-02-26 00:00:00 Niagara F… CAN          
##  4   8467658 Dan        Snyder      1978-02-23 00:00:00 Kitchener  CAN          
##  5   8459159 Chris      Herperger   1974-02-24 00:00:00 Esterhazy  CAN          
##  6   8467387 Joe        Dipenta     1979-02-25 00:00:00 Barrie     CAN          
##  7   8460649 Serge      Aubin       1975-02-15 00:00:00 Val-d'Or   CAN          
##  8   8462211 Jean-Luc   Grand-Pier… 1977-02-02 00:00:00 Montréal   CAN          
##  9   8467333 Rico       Fata        1980-02-12 00:00:00 Sault Ste… CAN          
## 10   8470601 Braydon    Coburn      1985-02-27 00:00:00 Shaunavon  CAN          
## # ℹ 523 more rows
## # ℹ 3 more variables: birth_state_province <chr>, birth_year <dbl>,
## #   birth_month <dbl>

Arrange rows

arrange(data, birth_date)
## # A tibble: 8,474 × 9
##    player_id first_name last_name   birth_date          birth_city birth_country
##        <dbl> <chr>      <chr>       <dttm>              <chr>      <chr>        
##  1   8446662 Harold     Halderson   1900-01-06 00:00:00 Winnipeg   CAN          
##  2   8450099 Herb       Rheaume     1900-01-12 00:00:00 Mason      CAN          
##  3   8449199 Billy      Stuart      1900-02-01 00:00:00 Sackville  CAN          
##  4   8445644 Charlie    Cotch       1900-02-21 00:00:00 Sarnia     CAN          
##  5   8445818 Stan       Crossett    1900-04-18 00:00:00 Tillsonbu… CAN          
##  6   8446613 Fred       Gordon      1900-05-06 00:00:00 Fleming    CAN          
##  7   8447573 Reg        Mackey      1900-05-07 00:00:00 Ottawa     CAN          
##  8   8444855 Clarence   Abel        1900-05-28 00:00:00 Sault Ste… USA          
##  9   8447741 Duke       McCurry     1900-06-13 00:00:00 Toronto    CAN          
## 10   8448237 Leo        Quenneville 1900-06-15 00:00:00 St-Anicet  CAN          
## # ℹ 8,464 more rows
## # ℹ 3 more variables: birth_state_province <chr>, birth_year <dbl>,
## #   birth_month <dbl>

Select columns

select(data, birth_country:birth_state_province)
## # A tibble: 8,474 × 2
##    birth_country birth_state_province
##    <chr>         <chr>               
##  1 CAN           British Columbia    
##  2 CAN           Quebec              
##  3 CAN           Quebec              
##  4 CAN           Alberta             
##  5 CAN           Ontario             
##  6 CAN           Saskatchewan        
##  7 CAN           Alberta             
##  8 CAN           Ontario             
##  9 CAN           Ontario             
## 10 CAN           British Columbia    
## # ℹ 8,464 more rows

Add columns

Summarize by groups

summarise(data, mean(birth_month))
## # A tibble: 1 × 1
##   `mean(birth_month)`
##                 <dbl>
## 1                5.99