salary = read.csv("C:\\Thach\\UTS\\Teaching\\TRM\\Practical Data Analysis\\2024_Autumn semester\\Data\\Professorial Salaries.csv")
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ Rank + Discipline + Yrs.since.phd + Yrs.service + NPubs + Ncits + Salary | Sex, data = salary)
Female (N=39) |
Male (N=358) |
Overall (N=397) |
|
---|---|---|---|
Rank | |||
AssocProf | 10 (25.6%) | 54 (15.1%) | 64 (16.1%) |
AsstProf | 11 (28.2%) | 56 (15.6%) | 67 (16.9%) |
Prof | 18 (46.2%) | 248 (69.3%) | 266 (67.0%) |
Discipline | |||
A | 18 (46.2%) | 163 (45.5%) | 181 (45.6%) |
B | 21 (53.8%) | 195 (54.5%) | 216 (54.4%) |
Yrs.since.phd | |||
Mean (SD) | 16.5 (9.78) | 22.9 (13.0) | 22.3 (12.9) |
Median [Min, Max] | 17.0 [2.00, 39.0] | 22.0 [1.00, 56.0] | 21.0 [1.00, 56.0] |
Yrs.service | |||
Mean (SD) | 11.6 (8.81) | 18.3 (13.2) | 17.6 (13.0) |
Median [Min, Max] | 10.0 [0, 36.0] | 18.0 [0, 60.0] | 16.0 [0, 60.0] |
NPubs | |||
Mean (SD) | 20.2 (14.4) | 17.9 (13.9) | 18.2 (14.0) |
Median [Min, Max] | 18.0 [1.00, 50.0] | 13.0 [1.00, 69.0] | 13.0 [1.00, 69.0] |
Ncits | |||
Mean (SD) | 40.7 (16.2) | 40.2 (17.0) | 40.2 (16.9) |
Median [Min, Max] | 36.0 [14.0, 70.0] | 35.0 [1.00, 90.0] | 35.0 [1.00, 90.0] |
Salary | |||
Mean (SD) | 101000 (26000) | 115000 (30400) | 114000 (30300) |
Median [Min, Max] | 104000 [62900, 161000] | 108000 [57800, 232000] | 107000 [57800, 232000] |
rank.sex = chisq.test(salary$Rank, salary$Sex)
rank.sex
##
## Pearson's Chi-squared test
##
## data: salary$Rank and salary$Sex
## X-squared = 8.5259, df = 2, p-value = 0.01408
Interpretation: there is evidence (P= 0.014) that professors’ ranks differed significantly between male and female professors.
#install.packages("chisq.posthoc.test")
library(chisq.posthoc.test)
## Warning: package 'chisq.posthoc.test' was built under R version 4.3.3
chisq.tab = as.table(rbind(c(10, 11, 18), c(54, 56, 248)))
dimnames(chisq.tab) = list(sex = c("Female", "Male"),
rank = c("AssocProf", "AsstProf", "Prof"))
chisq.tab
## rank
## sex AssocProf AsstProf Prof
## Female 10 11 18
## Male 54 56 248
chisq.posthoc.test(chisq.tab)
## Dimension Value AssocProf AsstProf Prof
## 1 Female Residuals 1.702577 1.989100 -2.915939
## 2 Female p values 0.531883 0.280141 0.021277
## 3 Male Residuals -1.702577 -1.989100 2.915939
## 4 Male p values 0.531883 0.280141 0.021277
Interpretation: there is evidence (P= 0.02) that professors differed between males and females; whereas there is no evidence that assistant and associate professors differed between males and females (P= 0.28 and 0.53, respectively)
assoc = subset(salary, Rank == "AssocProf")
dim(assoc)
## [1] 64 9
table1(~ Rank + Discipline + Yrs.since.phd + Yrs.service + NPubs + Ncits + Salary | Sex, data = assoc)
Female (N=10) |
Male (N=54) |
Overall (N=64) |
|
---|---|---|---|
Rank | |||
AssocProf | 10 (100%) | 54 (100%) | 64 (100%) |
Discipline | |||
A | 4 (40.0%) | 22 (40.7%) | 26 (40.6%) |
B | 6 (60.0%) | 32 (59.3%) | 38 (59.4%) |
Yrs.since.phd | |||
Mean (SD) | 15.5 (5.80) | 15.4 (10.2) | 15.5 (9.65) |
Median [Min, Max] | 13.0 [10.0, 26.0] | 11.5 [6.00, 49.0] | 12.0 [6.00, 49.0] |
Yrs.service | |||
Mean (SD) | 11.5 (6.26) | 12.0 (10.7) | 12.0 (10.1) |
Median [Min, Max] | 9.50 [6.00, 24.0] | 8.00 [1.00, 53.0] | 8.00 [1.00, 53.0] |
NPubs | |||
Mean (SD) | 13.8 (11.5) | 19.6 (13.7) | 18.7 (13.5) |
Median [Min, Max] | 11.0 [1.00, 38.0] | 16.0 [1.00, 50.0] | 16.0 [1.00, 50.0] |
Ncits | |||
Mean (SD) | 42.9 (14.9) | 41.9 (15.8) | 42.0 (15.6) |
Median [Min, Max] | 47.5 [19.0, 60.0] | 36.0 [14.0, 83.0] | 36.5 [14.0, 83.0] |
Salary | |||
Mean (SD) | 88500 (18000) | 94900 (12900) | 93900 (13800) |
Median [Min, Max] | 90600 [62900, 110000] | 95600 [70000, 126000] | 95600 [62900, 126000] |
As the expected number of female professors in the Theoretical discipline (Discipline = A) is 4.06 (the observed number = 4)< 5, a Fisher’s exact test is used.
fisher = fisher.test(assoc$Sex, assoc$Discipline)
fisher
##
## Fisher's Exact Test for Count Data
##
## data: assoc$Sex and assoc$Discipline
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.1795097 4.6576966
## sample estimates:
## odds ratio
## 0.9701586
Interpretation: there is no evidence (P~ 1.0) that the disciplines differed between male and female professors.