#TRM-course Lecture 1 ## Task 4: Describe the salary data set:
4.1 How many participants/variables are there in the “salary” data set?
library(carData)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
salary <- Salaries
dim(salary)
## [1] 397 6
4.2 List the variable names:
names(salary)
## [1] "rank" "discipline" "yrs.since.phd" "yrs.service"
## [5] "sex" "salary"
4.3 List the 6 observations:
head(salary)
## rank discipline yrs.since.phd yrs.service sex salary
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
library(dplyr)
salary = salary %>% mutate(salary.level = case_when(salary >= 130000~ "High", salary>= 100000 & salary < 130000~ "Medium", salary< 100000~ "Low"))
salary = salary %>% mutate(high.salary = case_when(salary >= 130000~ 1, salary < 130000~ 0))
salary = salary %>% mutate(salary.aud = salary*1.53)
Task 8. Select a subset that includes male professors in Discipline A and had high salary
Men.A.High = salary %>% filter(sex == "Male", discipline == "A", salary.level == "High")
dim(Men.A.High)
## [1] 40 9
Task 9:Select 4 variables (ID, Rank, Sex, Salary)
var.select2 = salary %>% select(rank, salary, sex)
names(var.select2)
## [1] "rank" "salary" "sex"