Import data

# csv file
data <- read_csv("../00_data/myData.csv")
data
## # A tibble: 882 × 69
##    EXPID     PEAKID  YEAR SEASON SEASON_FACTOR  HOST HOST_FACTOR ROUTE1   ROUTE2
##    <chr>     <chr>  <dbl>  <dbl> <chr>         <dbl> <chr>       <chr>    <chr> 
##  1 EVER20101 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  2 EVER20102 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  3 EVER20103 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  4 AMAD20301 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  5 AMAD20302 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  6 AMAD20303 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  7 AMAD20304 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  8 AMAD20305 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  9 AMAD20306 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
## 10 AMAD20307 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
## # ℹ 872 more rows
## # ℹ 60 more variables: ROUTE3 <lgl>, ROUTE4 <lgl>, NATION <chr>, LEADERS <chr>,
## #   SPONSOR <chr>, SUCCESS1 <lgl>, SUCCESS2 <lgl>, SUCCESS3 <lgl>,
## #   SUCCESS4 <lgl>, ASCENT1 <chr>, ASCENT2 <chr>, ASCENT3 <lgl>, ASCENT4 <lgl>,
## #   CLAIMED <lgl>, DISPUTED <lgl>, COUNTRIES <chr>, APPROACH <chr>,
## #   BCDATE <date>, SMTDATE <date>, SMTTIME <chr>, SMTDAYS <dbl>, TOTDAYS <dbl>,
## #   TERMDATE <date>, TERMREASON <dbl>, TERMREASON_FACTOR <chr>, …

Apply the following dplyr verbs to your data

Filter rows

filter(data, HOST_FACTOR == "China")
## # A tibble: 25 × 69
##    EXPID     PEAKID  YEAR SEASON SEASON_FACTOR  HOST HOST_FACTOR ROUTE1   ROUTE2
##    <chr>     <chr>  <dbl>  <dbl> <chr>         <dbl> <chr>       <chr>    <chr> 
##  1 EVER20101 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  2 EVER20102 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  3 EVER20103 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  4 CHOY20301 CHOY    2020      3 Autumn            2 China       NW side  <NA>  
##  5 EVER21151 EVER    2021      1 Spring            2 China       N Col-N… <NA>  
##  6 EVER22141 EVER    2022      1 Spring            2 China       N Col-N… <NA>  
##  7 EVER22142 EVER    2022      1 Spring            2 China       N Col-N… <NA>  
##  8 CHOY22301 CHOY    2022      3 Autumn            2 China       SW Ridge <NA>  
##  9 CHOY22302 CHOY    2022      3 Autumn            2 China       SW Ridge <NA>  
## 10 CHOY23101 CHOY    2023      1 Spring            2 China       NW side  <NA>  
## # ℹ 15 more rows
## # ℹ 60 more variables: ROUTE3 <lgl>, ROUTE4 <lgl>, NATION <chr>, LEADERS <chr>,
## #   SPONSOR <chr>, SUCCESS1 <lgl>, SUCCESS2 <lgl>, SUCCESS3 <lgl>,
## #   SUCCESS4 <lgl>, ASCENT1 <chr>, ASCENT2 <chr>, ASCENT3 <lgl>, ASCENT4 <lgl>,
## #   CLAIMED <lgl>, DISPUTED <lgl>, COUNTRIES <chr>, APPROACH <chr>,
## #   BCDATE <date>, SMTDATE <date>, SMTTIME <chr>, SMTDAYS <dbl>, TOTDAYS <dbl>,
## #   TERMDATE <date>, TERMREASON <dbl>, TERMREASON_FACTOR <chr>, …

Arrange rows

arrange(data, desc(HOST))
## # A tibble: 882 × 69
##    EXPID     PEAKID  YEAR SEASON SEASON_FACTOR  HOST HOST_FACTOR ROUTE1   ROUTE2
##    <chr>     <chr>  <dbl>  <dbl> <chr>         <dbl> <chr>       <chr>    <chr> 
##  1 KIRA22201 KIRA    2022      2 Summer            3 India       N Face   <NA>  
##  2 LNPS22301 LNPS    2022      3 Autumn            3 India       S Face-… <NA>  
##  3 EVER20101 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  4 EVER20102 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  5 EVER20103 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  6 CHOY20301 CHOY    2020      3 Autumn            2 China       NW side  <NA>  
##  7 EVER21151 EVER    2021      1 Spring            2 China       N Col-N… <NA>  
##  8 EVER22141 EVER    2022      1 Spring            2 China       N Col-N… <NA>  
##  9 EVER22142 EVER    2022      1 Spring            2 China       N Col-N… <NA>  
## 10 CHOY22301 CHOY    2022      3 Autumn            2 China       SW Ridge <NA>  
## # ℹ 872 more rows
## # ℹ 60 more variables: ROUTE3 <lgl>, ROUTE4 <lgl>, NATION <chr>, LEADERS <chr>,
## #   SPONSOR <chr>, SUCCESS1 <lgl>, SUCCESS2 <lgl>, SUCCESS3 <lgl>,
## #   SUCCESS4 <lgl>, ASCENT1 <chr>, ASCENT2 <chr>, ASCENT3 <lgl>, ASCENT4 <lgl>,
## #   CLAIMED <lgl>, DISPUTED <lgl>, COUNTRIES <chr>, APPROACH <chr>,
## #   BCDATE <date>, SMTDATE <date>, SMTTIME <chr>, SMTDAYS <dbl>, TOTDAYS <dbl>,
## #   TERMDATE <date>, TERMREASON <dbl>, TERMREASON_FACTOR <chr>, …

Select columns

select(data, SEASON:HOST)
## # A tibble: 882 × 3
##    SEASON SEASON_FACTOR  HOST
##     <dbl> <chr>         <dbl>
##  1      1 Spring            2
##  2      1 Spring            2
##  3      1 Spring            2
##  4      3 Autumn            1
##  5      3 Autumn            1
##  6      3 Autumn            1
##  7      3 Autumn            1
##  8      3 Autumn            1
##  9      3 Autumn            1
## 10      3 Autumn            1
## # ℹ 872 more rows

Add columns

mutate(data,
       TOTMEMBERS = SMTMEMBERS - MDEATHS)
## # A tibble: 882 × 69
##    EXPID     PEAKID  YEAR SEASON SEASON_FACTOR  HOST HOST_FACTOR ROUTE1   ROUTE2
##    <chr>     <chr>  <dbl>  <dbl> <chr>         <dbl> <chr>       <chr>    <chr> 
##  1 EVER20101 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  2 EVER20102 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  3 EVER20103 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  4 AMAD20301 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  5 AMAD20302 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  6 AMAD20303 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  7 AMAD20304 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  8 AMAD20305 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  9 AMAD20306 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
## 10 AMAD20307 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
## # ℹ 872 more rows
## # ℹ 60 more variables: ROUTE3 <lgl>, ROUTE4 <lgl>, NATION <chr>, LEADERS <chr>,
## #   SPONSOR <chr>, SUCCESS1 <lgl>, SUCCESS2 <lgl>, SUCCESS3 <lgl>,
## #   SUCCESS4 <lgl>, ASCENT1 <chr>, ASCENT2 <chr>, ASCENT3 <lgl>, ASCENT4 <lgl>,
## #   CLAIMED <lgl>, DISPUTED <lgl>, COUNTRIES <chr>, APPROACH <chr>,
## #   BCDATE <date>, SMTDATE <date>, SMTTIME <chr>, SMTDAYS <dbl>, TOTDAYS <dbl>,
## #   TERMDATE <date>, TERMREASON <dbl>, TERMREASON_FACTOR <chr>, …

Summarize by groups

data %>%
    
    # Remove missing values
    filter(!is.na(MDEATHS))
## # A tibble: 882 × 69
##    EXPID     PEAKID  YEAR SEASON SEASON_FACTOR  HOST HOST_FACTOR ROUTE1   ROUTE2
##    <chr>     <chr>  <dbl>  <dbl> <chr>         <dbl> <chr>       <chr>    <chr> 
##  1 EVER20101 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  2 EVER20102 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  3 EVER20103 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
##  4 AMAD20301 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  5 AMAD20302 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  6 AMAD20303 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  7 AMAD20304 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  8 AMAD20305 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
##  9 AMAD20306 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
## 10 AMAD20307 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
## # ℹ 872 more rows
## # ℹ 60 more variables: ROUTE3 <lgl>, ROUTE4 <lgl>, NATION <chr>, LEADERS <chr>,
## #   SPONSOR <chr>, SUCCESS1 <lgl>, SUCCESS2 <lgl>, SUCCESS3 <lgl>,
## #   SUCCESS4 <lgl>, ASCENT1 <chr>, ASCENT2 <chr>, ASCENT3 <lgl>, ASCENT4 <lgl>,
## #   CLAIMED <lgl>, DISPUTED <lgl>, COUNTRIES <chr>, APPROACH <chr>,
## #   BCDATE <date>, SMTDATE <date>, SMTTIME <chr>, SMTDAYS <dbl>, TOTDAYS <dbl>,
## #   TERMDATE <date>, TERMREASON <dbl>, TERMREASON_FACTOR <chr>, …