Import data

# excel file
data <- read_excel("../00_data/Salaries.xlsx")
data
## # A tibble: 397 × 6
##    rank      discipline yrs.since.phd yrs.service sex    salary
##    <chr>     <chr>              <dbl>       <dbl> <chr>   <dbl>
##  1 Prof      B                     19          18 Male   139750
##  2 Prof      B                     20          16 Male   173200
##  3 AsstProf  B                      4           3 Male    79750
##  4 Prof      B                     45          39 Male   115000
##  5 Prof      B                     40          41 Male   141500
##  6 AssocProf B                      6           6 Male    97000
##  7 Prof      B                     30          23 Male   175000
##  8 Prof      B                     45          45 Male   147765
##  9 Prof      B                     21          20 Male   119250
## 10 Prof      B                     18          18 Female 129000
## # ℹ 387 more rows

Apply the following dplyr verbs to your data

Filter rows

filter(data, rank == "Prof")
## # A tibble: 266 × 6
##    rank  discipline yrs.since.phd yrs.service sex    salary
##    <chr> <chr>              <dbl>       <dbl> <chr>   <dbl>
##  1 Prof  B                     19          18 Male   139750
##  2 Prof  B                     20          16 Male   173200
##  3 Prof  B                     45          39 Male   115000
##  4 Prof  B                     40          41 Male   141500
##  5 Prof  B                     30          23 Male   175000
##  6 Prof  B                     45          45 Male   147765
##  7 Prof  B                     21          20 Male   119250
##  8 Prof  B                     18          18 Female 129000
##  9 Prof  B                     20          18 Male   104800
## 10 Prof  B                     12           3 Male   117150
## # ℹ 256 more rows

Arrange rows

arrange(data, desc(yrs.since.phd))
## # A tibble: 397 × 6
##    rank      discipline yrs.since.phd yrs.service sex   salary
##    <chr>     <chr>              <dbl>       <dbl> <chr>  <dbl>
##  1 Prof      A                     56          57 Male   76840
##  2 Prof      B                     56          49 Male  186960
##  3 Prof      A                     54          49 Male   78162
##  4 Prof      A                     52          48 Male  107200
##  5 Prof      A                     51          51 Male   57800
##  6 AssocProf A                     49          49 Male   81800
##  7 Prof      A                     49          43 Male   72300
##  8 Prof      B                     49          60 Male  192253
##  9 Prof      A                     49          40 Male   88709
## 10 AssocProf B                     48          53 Male   90000
## # ℹ 387 more rows

Select columns

select(data, rank, yrs.service, sex, salary)
## # A tibble: 397 × 4
##    rank      yrs.service sex    salary
##    <chr>           <dbl> <chr>   <dbl>
##  1 Prof               18 Male   139750
##  2 Prof               16 Male   173200
##  3 AsstProf            3 Male    79750
##  4 Prof               39 Male   115000
##  5 Prof               41 Male   141500
##  6 AssocProf           6 Male    97000
##  7 Prof               23 Male   175000
##  8 Prof               45 Male   147765
##  9 Prof               20 Male   119250
## 10 Prof               18 Female 129000
## # ℹ 387 more rows
select(data, rank, yrs.service, sex, salary, everything())
## # A tibble: 397 × 6
##    rank      yrs.service sex    salary discipline yrs.since.phd
##    <chr>           <dbl> <chr>   <dbl> <chr>              <dbl>
##  1 Prof               18 Male   139750 B                     19
##  2 Prof               16 Male   173200 B                     20
##  3 AsstProf            3 Male    79750 B                      4
##  4 Prof               39 Male   115000 B                     45
##  5 Prof               41 Male   141500 B                     40
##  6 AssocProf           6 Male    97000 B                      6
##  7 Prof               23 Male   175000 B                     30
##  8 Prof               45 Male   147765 B                     45
##  9 Prof               20 Male   119250 B                     21
## 10 Prof               18 Female 129000 B                     18
## # ℹ 387 more rows

Add columns

mutate(data,
       yrs.btwn.ser.phd = yrs.since.phd - yrs.service) %>%
    
    # select rank, yrs.service, yrs.since.phd, sex, salary, yrs.btwn.ser.phd
    select(rank, yrs.service, yrs.since.phd, sex, salary, yrs.btwn.ser.phd)
## # A tibble: 397 × 6
##    rank      yrs.service yrs.since.phd sex    salary yrs.btwn.ser.phd
##    <chr>           <dbl>         <dbl> <chr>   <dbl>            <dbl>
##  1 Prof               18            19 Male   139750                1
##  2 Prof               16            20 Male   173200                4
##  3 AsstProf            3             4 Male    79750                1
##  4 Prof               39            45 Male   115000                6
##  5 Prof               41            40 Male   141500               -1
##  6 AssocProf           6             6 Male    97000                0
##  7 Prof               23            30 Male   175000                7
##  8 Prof               45            45 Male   147765                0
##  9 Prof               20            21 Male   119250                1
## 10 Prof               18            18 Female 129000                0
## # ℹ 387 more rows
# Just keep years between service and phd
mutate(data,
       yrs.btwn.ser.phd = yrs.since.phd - yrs.service) %>%
    
    # select rank, yrs.service, yrs.since.phd, sex, salary, yrs.btwn.ser.phd
    select(yrs.btwn.ser.phd)
## # A tibble: 397 × 1
##    yrs.btwn.ser.phd
##               <dbl>
##  1                1
##  2                4
##  3                1
##  4                6
##  5               -1
##  6                0
##  7                7
##  8                0
##  9                1
## 10                0
## # ℹ 387 more rows

Summarize by groups

Data to a single row

data
## # A tibble: 397 × 6
##    rank      discipline yrs.since.phd yrs.service sex    salary
##    <chr>     <chr>              <dbl>       <dbl> <chr>   <dbl>
##  1 Prof      B                     19          18 Male   139750
##  2 Prof      B                     20          16 Male   173200
##  3 AsstProf  B                      4           3 Male    79750
##  4 Prof      B                     45          39 Male   115000
##  5 Prof      B                     40          41 Male   141500
##  6 AssocProf B                      6           6 Male    97000
##  7 Prof      B                     30          23 Male   175000
##  8 Prof      B                     45          45 Male   147765
##  9 Prof      B                     21          20 Male   119250
## 10 Prof      B                     18          18 Female 129000
## # ℹ 387 more rows
# average salary
summarise(data, avg_salary = mean(salary))
## # A tibble: 1 × 1
##   avg_salary
##        <dbl>
## 1    113706.

By group

data %>%
  
    # Group by gender
      group_by(sex) %>%
    
    # Calculate average salary 
    summarise(avg_salary = mean(salary)) %>% 
    
    # Sort it 
    arrange(avg_salary)
## # A tibble: 2 × 2
##   sex    avg_salary
##   <chr>       <dbl>
## 1 Female    101002.
## 2 Male      115090.