salary = read.csv("C:\\Thach\\UTS\\Teaching\\TRM\\Practical Data Analysis\\2024_Autumn semester\\Data\\Professorial Salaries.csv")
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
set.seed(1234)
index = createDataPartition(salary$Salary, p =0.75, list = FALSE)
training = salary[index, ]
dim(training)
## [1] 300 9
testing = salary[-index, ]
dim(testing)
## [1] 97 9
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ Rank + Discipline + Yrs.since.phd + Yrs.service + NPubs + Ncits + Salary + Sex, data = training)
Overall (N=300) |
|
---|---|
Rank | |
AssocProf | 50 (16.7%) |
AsstProf | 50 (16.7%) |
Prof | 200 (66.7%) |
Discipline | |
A | 137 (45.7%) |
B | 163 (54.3%) |
Yrs.since.phd | |
Mean (SD) | 22.0 (12.9) |
Median [Min, Max] | 20.5 [1.00, 56.0] |
Yrs.service | |
Mean (SD) | 17.7 (12.9) |
Median [Min, Max] | 17.0 [0, 57.0] |
NPubs | |
Mean (SD) | 18.0 (14.0) |
Median [Min, Max] | 13.0 [1.00, 69.0] |
Ncits | |
Mean (SD) | 39.9 (17.6) |
Median [Min, Max] | 35.0 [1.00, 90.0] |
Salary | |
Mean (SD) | 114000 (29800) |
Median [Min, Max] | 107000 [57800, 232000] |
Sex | |
Female | 24 (8.0%) |
Male | 276 (92.0%) |
table1(~ Rank + Discipline + Yrs.since.phd + Yrs.service + NPubs + Ncits + Salary + Sex, data = testing)
Overall (N=97) |
|
---|---|
Rank | |
AssocProf | 14 (14.4%) |
AsstProf | 17 (17.5%) |
Prof | 66 (68.0%) |
Discipline | |
A | 44 (45.4%) |
B | 53 (54.6%) |
Yrs.since.phd | |
Mean (SD) | 23.2 (13.0) |
Median [Min, Max] | 22.0 [3.00, 56.0] |
Yrs.service | |
Mean (SD) | 17.2 (13.3) |
Median [Min, Max] | 14.0 [0, 60.0] |
NPubs | |
Mean (SD) | 18.7 (13.9) |
Median [Min, Max] | 14.0 [1.00, 50.0] |
Ncits | |
Mean (SD) | 41.2 (14.6) |
Median [Min, Max] | 37.0 [14.0, 83.0] |
Salary | |
Mean (SD) | 113000 (31900) |
Median [Min, Max] | 107000 [62900, 206000] |
Sex | |
Female | 15 (15.5%) |
Male | 82 (84.5%) |
library(BMA)
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-4)
xvars = training[, c("Rank", "Discipline", "Yrs.since.phd", "Yrs.service", "Sex", "NPubs", "Ncits")]
yvar = training[,c("Salary")]
bma = bicreg(x = xvars, y = yvar, strict = FALSE, OR = 20)
summary(bma)
##
## Call:
## bicreg(x = xvars, y = yvar, strict = FALSE, OR = 20)
##
##
## 7 models were selected
## Best 5 models (cumulative posterior probability = 0.9347 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100.0 8.833e+04 4376.20 88094.11 91372.51 86697.82
## RankAsstProf 100.0 -1.653e+04 4677.07 -16064.89 -18293.10 -16414.39
## RankProf 100.0 3.256e+04 3896.77 31938.49 34788.76 32340.00
## DisciplineB 100.0 1.315e+04 2697.84 13227.64 12527.42 13644.18
## Yrs.since.phd 17.8 9.058e+01 254.77 . . 648.48
## Yrs.service 33.1 -1.546e+02 285.53 . -249.58 -751.30
## SexMale 3.8 1.281e+02 1153.12 . . .
## NPubs 3.1 -7.687e-01 17.20 . . .
## Ncits 4.5 3.017e+00 21.15 . . .
##
## nVar 3 4 5
## r2 0.427 0.434 0.443
## BIC -149.79 -147.75 -147.22
## post prob 0.521 0.187 0.144
## model 4 model 5
## Intercept 85234.76 85203.55
## RankAsstProf -15556.44 -15926.67
## RankProf 32013.72 31688.22
## DisciplineB 13319.46 13128.83
## Yrs.since.phd . .
## Yrs.service . .
## SexMale . 3356.59
## NPubs . .
## Ncits 67.03 .
##
## nVar 4 4
## r2 0.428 0.428
## BIC -144.90 -144.57
## post prob 0.045 0.038
imageplot.bma(bma)
m1 = lm(Salary ~ Rank + Discipline, data = training)
summary(m1)
##
## Call:
## lm(formula = Salary ~ Rank + Discipline, data = training)
##
## Residuals:
## Min 1Q Median 3Q Max
## -65701 -13823 -778 11321 98285
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 88094 3632 24.258 < 2e-16 ***
## RankAsstProf -16065 4535 -3.542 0.000461 ***
## RankProf 31939 3608 8.852 < 2e-16 ***
## DisciplineB 13228 2665 4.963 1.17e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22670 on 296 degrees of freedom
## Multiple R-squared: 0.4267, Adjusted R-squared: 0.4209
## F-statistic: 73.44 on 3 and 296 DF, p-value: < 2.2e-16
par(mfrow = c(2,2))
plot(m1)