#TRM-course Lecture 1 ## Task 4: Describe the salary data set:

4.1 How many participants/variables are there in the “salary” data set?

library(carData)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
salary <- Salaries
dim(salary)
## [1] 397   6

4.2 List the variable names:

names(salary)
## [1] "rank"          "discipline"    "yrs.since.phd" "yrs.service"  
## [5] "sex"           "salary"

4.3 List the 6 observations:

head(salary)
##        rank discipline yrs.since.phd yrs.service  sex salary
## 1      Prof          B            19          18 Male 139750
## 2      Prof          B            20          16 Male 173200
## 3  AsstProf          B             4           3 Male  79750
## 4      Prof          B            45          39 Male 115000
## 5      Prof          B            40          41 Male 141500
## 6 AssocProf          B             6           6 Male  97000

Task 5: Create a new categorical variable “salary.level” with 3 levels (Low, Medium, High)

library(dplyr)
salary = salary %>% mutate(salary.level = case_when(salary >= 130000~ "High", salary>= 100000 & salary < 130000~ "Medium", salary< 100000~ "Low"))

Task 6: Create a new binary variable “high.salary” with 2 levels (Low, High)

salary = salary %>% mutate(high.salary = case_when(salary >= 130000~ 1, salary < 130000~ 0))

Task 7: Create a new continuous variable “salary.aud” (1 USD = 1.53 AUD)

salary = salary %>% mutate(salary.aud = salary*1.53)

Task 8. Select a subset that includes male professors in Discipline A and had high salary

Men.A.High = salary %>% filter(sex == "Male", discipline == "A", salary.level == "High")
dim(Men.A.High)
## [1] 40  9

Task 9:Select 4 variables (ID, Rank, Sex, Salary)

var.select2 = salary %>% select(rank, salary, sex)
names(var.select2)
## [1] "rank"   "salary" "sex"