Task 2: Import Library

#install.packages(c("readxl", "tidyverse", "dplyr", "table1", "compareGroups", "ggplot2", "grid", "gridExtra", "GGally", "ggthemes", "DescTools", "simpleboot", "lmboot"), dependencies=T)

Task 3: Import Dataset

salary = read.csv("D:\\OneDrive\\ANDREAS\\ACADEMICS_UNIVERSITY\\Year_4_2024\\Autumn\\42913\\CLasses\\R_Basic\\Professorial_Salaries.csv")
head(salary)
##   ID      Rank Discipline Yrs.since.phd Yrs.service  Sex NPubs Ncits Salary
## 1  1      Prof          B            19          18 Male    18    50 139750
## 2  2      Prof          B            20          16 Male     3    26 173200
## 3  3  AsstProf          B             4           3 Male     2    50  79750
## 4  4      Prof          B            45          39 Male    17    34 115000
## 5  5      Prof          B            40          41 Male    11    41 141500
## 6  6 AssocProf          B             6           6 Male     6    37  97000

Task 4: Describe the Data

dim(salary)
## [1] 397   9
names(salary)
## [1] "ID"            "Rank"          "Discipline"    "Yrs.since.phd"
## [5] "Yrs.service"   "Sex"           "NPubs"         "Ncits"        
## [9] "Salary"
head(salary)
##   ID      Rank Discipline Yrs.since.phd Yrs.service  Sex NPubs Ncits Salary
## 1  1      Prof          B            19          18 Male    18    50 139750
## 2  2      Prof          B            20          16 Male     3    26 173200
## 3  3  AsstProf          B             4           3 Male     2    50  79750
## 4  4      Prof          B            45          39 Male    17    34 115000
## 5  5      Prof          B            40          41 Male    11    41 141500
## 6  6 AssocProf          B             6           6 Male     6    37  97000

Task 5: Create a new variable “salary.level”

Access the library to use its commands.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
salary = salary %>% mutate(salary.level = case_when(Salary >= 130000~ "High", Salary>= 100000 & Salary< 130000~ "Medium", Salary<100000~ "Low"))
head(salary)
##   ID      Rank Discipline Yrs.since.phd Yrs.service  Sex NPubs Ncits Salary
## 1  1      Prof          B            19          18 Male    18    50 139750
## 2  2      Prof          B            20          16 Male     3    26 173200
## 3  3  AsstProf          B             4           3 Male     2    50  79750
## 4  4      Prof          B            45          39 Male    17    34 115000
## 5  5      Prof          B            40          41 Male    11    41 141500
## 6  6 AssocProf          B             6           6 Male     6    37  97000
##   salary.level
## 1         High
## 2         High
## 3          Low
## 4       Medium
## 5         High
## 6          Low

Task 6: Create new variable “high.salary”

salary = salary %>% mutate(high.salary = case_when(Salary >= 130000~ 1, Salary< 130000~ 0 ))
head(salary)
##   ID      Rank Discipline Yrs.since.phd Yrs.service  Sex NPubs Ncits Salary
## 1  1      Prof          B            19          18 Male    18    50 139750
## 2  2      Prof          B            20          16 Male     3    26 173200
## 3  3  AsstProf          B             4           3 Male     2    50  79750
## 4  4      Prof          B            45          39 Male    17    34 115000
## 5  5      Prof          B            40          41 Male    11    41 141500
## 6  6 AssocProf          B             6           6 Male     6    37  97000
##   salary.level high.salary
## 1         High           1
## 2         High           1
## 3          Low           0
## 4       Medium           0
## 5         High           1
## 6          Low           0

Task 7: Create a new variable “salary.aud” as professor’s salaries in AUD (USD/AUD ratio = 1.53).

salary = salary %>% mutate(salary.aud = Salary*1.53)
head(salary)
##   ID      Rank Discipline Yrs.since.phd Yrs.service  Sex NPubs Ncits Salary
## 1  1      Prof          B            19          18 Male    18    50 139750
## 2  2      Prof          B            20          16 Male     3    26 173200
## 3  3  AsstProf          B             4           3 Male     2    50  79750
## 4  4      Prof          B            45          39 Male    17    34 115000
## 5  5      Prof          B            40          41 Male    11    41 141500
## 6  6 AssocProf          B             6           6 Male     6    37  97000
##   salary.level high.salary salary.aud
## 1         High           1   213817.5
## 2         High           1   264996.0
## 3          Low           0   122017.5
## 4       Medium           0   175950.0
## 5         High           1   216495.0
## 6          Low           0   148410.0

Task 8: Select a subset of participants that includes male professors in the theoretical department who had high salaries.

Men.A.High = salary %>% filter(Sex == "Male", Discipline == "A", salary.level == "High")
head(Men.A.High)
##    ID Rank Discipline Yrs.since.phd Yrs.service  Sex NPubs Ncits Salary
## 1  27 Prof          A            35          23 Male    20    27 134885
## 2 110 Prof          A            40          31 Male    50    55 131205
## 3 117 Prof          A            30          29 Male    19    83 148500
## 4 127 Prof          A            28          26 Male     2    50 155500
## 5 135 Prof          A            35          25 Male    30    28 168635
## 6 136 Prof          A            20          18 Male    21    31 136000
##   salary.level high.salary salary.aud
## 1         High           1   206374.1
## 2         High           1   200743.6
## 3         High           1   227205.0
## 4         High           1   237915.0
## 5         High           1   258011.6
## 6         High           1   208080.0

Task 9: Create a new data set that includes 4 variables (ID, Rank, Sex and Salary)

new_dataset = salary %>% select(ID, Rank, Salary, Sex)
names(new_dataset)
## [1] "ID"     "Rank"   "Salary" "Sex"
head(new_dataset)
##   ID      Rank Salary  Sex
## 1  1      Prof 139750 Male
## 2  2      Prof 173200 Male
## 3  3  AsstProf  79750 Male
## 4  4      Prof 115000 Male
## 5  5      Prof 141500 Male
## 6  6 AssocProf  97000 Male