# excel file
data <- read_excel("../00_data/Salaries.xlsx")
data
## # A tibble: 397 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
## 7 Prof B 30 23 Male 175000
## 8 Prof B 45 45 Male 147765
## 9 Prof B 21 20 Male 119250
## 10 Prof B 18 18 Female 129000
## # ℹ 387 more rows
filter(data, rank == "Prof")
## # A tibble: 266 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 Prof B 45 39 Male 115000
## 4 Prof B 40 41 Male 141500
## 5 Prof B 30 23 Male 175000
## 6 Prof B 45 45 Male 147765
## 7 Prof B 21 20 Male 119250
## 8 Prof B 18 18 Female 129000
## 9 Prof B 20 18 Male 104800
## 10 Prof B 12 3 Male 117150
## # ℹ 256 more rows
arrange(data, desc(yrs.since.phd))
## # A tibble: 397 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof A 56 57 Male 76840
## 2 Prof B 56 49 Male 186960
## 3 Prof A 54 49 Male 78162
## 4 Prof A 52 48 Male 107200
## 5 Prof A 51 51 Male 57800
## 6 AssocProf A 49 49 Male 81800
## 7 Prof A 49 43 Male 72300
## 8 Prof B 49 60 Male 192253
## 9 Prof A 49 40 Male 88709
## 10 AssocProf B 48 53 Male 90000
## # ℹ 387 more rows
select(data, rank, yrs.service, sex, salary)
## # A tibble: 397 × 4
## rank yrs.service sex salary
## <chr> <dbl> <chr> <dbl>
## 1 Prof 18 Male 139750
## 2 Prof 16 Male 173200
## 3 AsstProf 3 Male 79750
## 4 Prof 39 Male 115000
## 5 Prof 41 Male 141500
## 6 AssocProf 6 Male 97000
## 7 Prof 23 Male 175000
## 8 Prof 45 Male 147765
## 9 Prof 20 Male 119250
## 10 Prof 18 Female 129000
## # ℹ 387 more rows
select(data, rank, yrs.service, sex, salary, everything())
## # A tibble: 397 × 6
## rank yrs.service sex salary discipline yrs.since.phd
## <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 Prof 18 Male 139750 B 19
## 2 Prof 16 Male 173200 B 20
## 3 AsstProf 3 Male 79750 B 4
## 4 Prof 39 Male 115000 B 45
## 5 Prof 41 Male 141500 B 40
## 6 AssocProf 6 Male 97000 B 6
## 7 Prof 23 Male 175000 B 30
## 8 Prof 45 Male 147765 B 45
## 9 Prof 20 Male 119250 B 21
## 10 Prof 18 Female 129000 B 18
## # ℹ 387 more rows
mutate(data,
yrs.btwn.ser.phd = yrs.since.phd - yrs.service) %>%
# select rank, yrs.service, yrs.since.phd, sex, salary, yrs.btwn.ser.phd
select(rank, yrs.service, yrs.since.phd, sex, salary, yrs.btwn.ser.phd)
## # A tibble: 397 × 6
## rank yrs.service yrs.since.phd sex salary yrs.btwn.ser.phd
## <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 Prof 18 19 Male 139750 1
## 2 Prof 16 20 Male 173200 4
## 3 AsstProf 3 4 Male 79750 1
## 4 Prof 39 45 Male 115000 6
## 5 Prof 41 40 Male 141500 -1
## 6 AssocProf 6 6 Male 97000 0
## 7 Prof 23 30 Male 175000 7
## 8 Prof 45 45 Male 147765 0
## 9 Prof 20 21 Male 119250 1
## 10 Prof 18 18 Female 129000 0
## # ℹ 387 more rows
# Just keep years between service and phd
mutate(data,
yrs.btwn.ser.phd = yrs.since.phd - yrs.service) %>%
# select rank, yrs.service, yrs.since.phd, sex, salary, yrs.btwn.ser.phd
select(yrs.btwn.ser.phd)
## # A tibble: 397 × 1
## yrs.btwn.ser.phd
## <dbl>
## 1 1
## 2 4
## 3 1
## 4 6
## 5 -1
## 6 0
## 7 7
## 8 0
## 9 1
## 10 0
## # ℹ 387 more rows
Data to a single row
data
## # A tibble: 397 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
## 7 Prof B 30 23 Male 175000
## 8 Prof B 45 45 Male 147765
## 9 Prof B 21 20 Male 119250
## 10 Prof B 18 18 Female 129000
## # ℹ 387 more rows
# average salary
summarise(data, avg_salary = mean(salary))
## # A tibble: 1 × 1
## avg_salary
## <dbl>
## 1 113706.
By group
data %>%
# Group by gender
group_by(sex) %>%
# Calculate average salary
summarise(avg_salary = mean(salary)) %>%
# Sort it
arrange(avg_salary)
## # A tibble: 2 × 2
## sex avg_salary
## <chr> <dbl>
## 1 Female 101002.
## 2 Male 115090.