salary = read.csv("C:\\Thach\\UTS\\Teaching\\TRM\\Practical Data Analysis\\2024_Autumn semester\\Data\\Professorial Salaries.csv")
salary$high.salary = ifelse(salary$Salary>= 130000, 1, 0)
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ Rank + Discipline + Yrs.since.phd + Yrs.service + NPubs + Ncits + Salary + Sex | high.salary, data = salary)
## Warning in table1.formula(~Rank + Discipline + Yrs.since.phd + Yrs.service + :
## Terms to the right of '|' in formula 'x' define table columns and are expected
## to be factors with meaningful labels.
0 (N=287) |
1 (N=110) |
Overall (N=397) |
|
---|---|---|---|
Rank | |||
AssocProf | 64 (22.3%) | 0 (0%) | 64 (16.1%) |
AsstProf | 67 (23.3%) | 0 (0%) | 67 (16.9%) |
Prof | 156 (54.4%) | 110 (100%) | 266 (67.0%) |
Discipline | |||
A | 140 (48.8%) | 41 (37.3%) | 181 (45.6%) |
B | 147 (51.2%) | 69 (62.7%) | 216 (54.4%) |
Yrs.since.phd | |||
Mean (SD) | 19.7 (13.1) | 29.2 (9.41) | 22.3 (12.9) |
Median [Min, Max] | 17.0 [1.00, 56.0] | 29.0 [11.0, 56.0] | 21.0 [1.00, 56.0] |
Yrs.service | |||
Mean (SD) | 15.4 (13.0) | 23.3 (11.2) | 17.6 (13.0) |
Median [Min, Max] | 11.0 [0, 57.0] | 21.0 [2.00, 60.0] | 16.0 [0, 60.0] |
NPubs | |||
Mean (SD) | 17.3 (13.0) | 20.3 (16.2) | 18.2 (14.0) |
Median [Min, Max] | 13.0 [1.00, 69.0] | 16.0 [1.00, 69.0] | 13.0 [1.00, 69.0] |
Ncits | |||
Mean (SD) | 38.7 (15.6) | 44.3 (19.4) | 40.2 (16.9) |
Median [Min, Max] | 35.0 [1.00, 90.0] | 41.0 [1.00, 90.0] | 35.0 [1.00, 90.0] |
Salary | |||
Mean (SD) | 98400 (16900) | 154000 (18900) | 114000 (30300) |
Median [Min, Max] | 100000 [57800, 130000] | 149000 [131000, 232000] | 107000 [57800, 232000] |
Sex | |||
Female | 34 (11.8%) | 5 (4.5%) | 39 (9.8%) |
Male | 253 (88.2%) | 105 (95.5%) | 358 (90.2%) |
m.1 = glm(high.salary ~ Sex, family = "binomial", data = salary)
summary(m.1)
##
## Call:
## glm(formula = high.salary ~ Sex, family = "binomial", data = salary)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.9169 0.4790 -4.002 6.28e-05 ***
## SexMale 1.0375 0.4928 2.105 0.0353 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 468.60 on 396 degrees of freedom
## Residual deviance: 463.11 on 395 degrees of freedom
## AIC: 467.11
##
## Number of Fisher Scoring iterations: 4
library(epiDisplay)
## Warning: package 'epiDisplay' was built under R version 4.3.2
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
logistic.display(m.1)
##
## Logistic regression predicting high.salary
##
## OR(95%CI) P(Wald's test) P(LR-test)
## Sex (cont. var.) 2.82 (1.07,7.41) 0.035 0.019
##
## Log-likelihood = -231.5529
## No. of observations = 397
## AIC value = 467.1058
Interpretation: There is evidence (P= .035) that male professors were associated with 2.8-fold greater odds of getting a high salary than their female counterparts, ranging from 7% to 7.4-fold greater.