Import data
# csv file
data <- read_csv("../00_data/Salaries.csv")
data
## # A tibble: 397 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
## 7 Prof B 30 23 Male 175000
## 8 Prof B 45 45 Male 147765
## 9 Prof B 21 20 Male 119250
## 10 Prof B 18 18 Female 129000
## # ℹ 387 more rows
# excel file
# data <- read_excel("../00_data/Salaries.xlsx")
# data
Apply the following dplyr verbs to your data
Filter rows
filter(data, rank == "Prof", salary >= 10000)
## # A tibble: 266 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 Prof B 45 39 Male 115000
## 4 Prof B 40 41 Male 141500
## 5 Prof B 30 23 Male 175000
## 6 Prof B 45 45 Male 147765
## 7 Prof B 21 20 Male 119250
## 8 Prof B 18 18 Female 129000
## 9 Prof B 20 18 Male 104800
## 10 Prof B 12 3 Male 117150
## # ℹ 256 more rows
filter(data, rank == "Prof" | rank == "AsstProf")
## # A tibble: 333 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 Prof B 30 23 Male 175000
## 7 Prof B 45 45 Male 147765
## 8 Prof B 21 20 Male 119250
## 9 Prof B 18 18 Female 129000
## 10 AsstProf B 7 2 Male 79800
## # ℹ 323 more rows
Arrange rows
arrange(data, desc(yrs.since.phd))
## # A tibble: 397 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof A 56 57 Male 76840
## 2 Prof B 56 49 Male 186960
## 3 Prof A 54 49 Male 78162
## 4 Prof A 52 48 Male 107200
## 5 Prof A 51 51 Male 57800
## 6 AssocProf A 49 49 Male 81800
## 7 Prof A 49 43 Male 72300
## 8 Prof B 49 60 Male 192253
## 9 Prof A 49 40 Male 88709
## 10 AssocProf B 48 53 Male 90000
## # ℹ 387 more rows
Select columns
select(data, sex)
## # A tibble: 397 × 1
## sex
## <chr>
## 1 Male
## 2 Male
## 3 Male
## 4 Male
## 5 Male
## 6 Male
## 7 Male
## 8 Male
## 9 Male
## 10 Female
## # ℹ 387 more rows
Add columns
mutate(data,"years_phd-service" = yrs.since.phd - yrs.service)
## # A tibble: 397 × 7
## rank discipline yrs.since.phd yrs.service sex salary `years_phd-service`
## <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 Prof B 19 18 Male 139750 1
## 2 Prof B 20 16 Male 173200 4
## 3 AsstPr… B 4 3 Male 79750 1
## 4 Prof B 45 39 Male 115000 6
## 5 Prof B 40 41 Male 141500 -1
## 6 AssocP… B 6 6 Male 97000 0
## 7 Prof B 30 23 Male 175000 7
## 8 Prof B 45 45 Male 147765 0
## 9 Prof B 21 20 Male 119250 1
## 10 Prof B 18 18 Fema… 129000 0
## # ℹ 387 more rows
Summarize by groups
data
## # A tibble: 397 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
## 7 Prof B 30 23 Male 175000
## 8 Prof B 45 45 Male 147765
## 9 Prof B 21 20 Male 119250
## 10 Prof B 18 18 Female 129000
## # ℹ 387 more rows
#avg years_phd-service
data |>
filter(sex == "Male") |>
summarise(`years_phd-service` = mean(yrs.since.phd - yrs.service))
## # A tibble: 1 × 1
## `years_phd-service`
## <dbl>
## 1 4.67