Done by LucÃa GarcÃa, Miguel RodrÃguez, Diego Rivera and Adrián MacÃas
#The Salaries dataset from the carData consists of nine-month academic salary for Assistant #Professors, Associate Professors and Professors in a college in the U.S. The data were #collected as part of the on-going effort of the college’s administration to monitor salary #differences between male and female faculty members.
knitr::opts_chunk$set(echo = TRUE)
options(scipen = 999, digits=3)
options(repos = list(CRAN="http://cran.rstudio.com/"))
library(rmdformats)
## Warning: package 'rmdformats' was built under R version 4.3.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tibble' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.3.2
library(rstatix)
## Warning: package 'rstatix' was built under R version 4.3.2
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(datarium)
## Warning: package 'datarium' was built under R version 4.3.2
library(ggplot2)
library(knitr)
set.seed(123)
library(report)
## Warning: package 'report' was built under R version 4.3.2
library(emmeans)
install.packages(c("kableExtra", "qqplotr"))
## Installing packages into 'C:/Users/adria/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'kableExtra' successfully unpacked and MD5 sums checked
## package 'qqplotr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\adria\AppData\Local\Temp\RtmpAjZbVL\downloaded_packages
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.3.2
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(qqplotr)
## Warning: package 'qqplotr' was built under R version 4.3.2
##
## Attaching package: 'qqplotr'
##
## The following objects are masked from 'package:ggplot2':
##
## stat_qq_line, StatQqLine
data(Salaries)
## Warning in data(Salaries): data set 'Salaries' not found
# Load the necessary library
library(carData)
## Warning: package 'carData' was built under R version 4.3.2
# Load the Salaries dataset
data(Salaries)
#1.Perform the 3-way Anova with and w/o interactions. Interpret the results: ## DESCRIPTIVE STATISTICS # Histogram:
Salaries$rank <- as.factor(Salaries$rank)
Salaries$sex <- as.factor(Salaries$sex)
Salaries$discipline <- as.factor(Salaries$discipline)
ggplot(Salaries, aes(x = salary)) +
geom_histogram(bins = 10, fill = "blue", color = "black", alpha = 0.7) +
facet_grid(rank ~ sex * discipline) +
labs(title = "Salary Distribution",
x = "Salary",
y = "Frequency") +
theme_classic()
# Cross-tabulation for Salaries
xtabs(~ rank + sex + discipline, data = Salaries)
## , , discipline = A
##
## sex
## rank Female Male
## AsstProf 6 18
## AssocProf 4 22
## Prof 8 123
##
## , , discipline = B
##
## sex
## rank Female Male
## AsstProf 5 38
## AssocProf 6 32
## Prof 10 125
# Calculate the mean and standard deviation of salaries by groups
Salaries %>%
group_by(rank, sex, discipline) %>%
get_summary_stats(salary, type = "mean_sd")
## # A tibble: 12 × 7
## rank discipline sex variable n mean sd
## <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl>
## 1 AsstProf A Female salary 6 72933. 5463.
## 2 AsstProf B Female salary 5 84190. 9792.
## 3 AsstProf A Male salary 18 74270. 4580.
## 4 AsstProf B Male salary 38 84647. 6900.
## 5 AssocProf A Female salary 4 72128. 6403.
## 6 AssocProf B Female salary 6 99436. 14086.
## 7 AssocProf A Male salary 22 85049. 10612.
## 8 AssocProf B Male salary 32 101622. 9608.
## 9 Prof A Female salary 8 109632. 15095.
## 10 Prof B Female salary 10 131836. 17504.
## 11 Prof A Male salary 123 120619. 28505.
## 12 Prof B Male salary 125 133518. 26514.
# Define the remove_outliers function
remove_outliers <- function(data, group_vars, outlier_var) {
# Identify outliers
outliers <- data %>%
group_by(across(all_of(group_vars))) %>%
identify_outliers(!!sym(outlier_var))
# Remove outliers from the original dataset
data_clean <- data %>%
anti_join(outliers, by = c(group_vars, outlier_var))
return(data_clean)
}
# Remove outliers for the Salaries dataset
Salaries <- remove_outliers(Salaries, c("rank", "sex", "discipline"), "salary")
Salaries
## rank discipline yrs.since.phd yrs.service sex salary
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
## 7 Prof B 30 23 Male 175000
## 8 Prof B 45 45 Male 147765
## 9 Prof B 21 20 Male 119250
## 10 Prof B 18 18 Female 129000
## 11 AssocProf B 12 8 Male 119800
## 12 AsstProf B 7 2 Male 79800
## 13 AsstProf B 1 1 Male 77700
## 14 AsstProf B 2 0 Male 78000
## 15 Prof B 20 18 Male 104800
## 16 Prof B 12 3 Male 117150
## 17 Prof B 19 20 Male 101000
## 18 Prof A 38 34 Male 103450
## 19 Prof A 37 23 Male 124750
## 20 Prof A 39 36 Female 137000
## 21 Prof A 31 26 Male 89565
## 22 Prof A 36 31 Male 102580
## 23 Prof A 34 30 Male 93904
## 24 Prof A 24 19 Male 113068
## 25 AssocProf A 13 8 Female 74830
## 26 Prof A 21 8 Male 106294
## 27 Prof A 35 23 Male 134885
## 28 AsstProf B 5 3 Male 82379
## 29 AsstProf B 11 0 Male 77000
## 30 Prof B 12 8 Male 118223
## 31 Prof B 20 4 Male 132261
## 32 AsstProf B 7 2 Male 79916
## 33 Prof B 13 9 Male 117256
## 34 AsstProf B 4 2 Male 80225
## 35 AsstProf B 4 2 Female 80225
## 36 AsstProf B 5 0 Female 77000
## 37 Prof B 22 21 Male 155750
## 38 AsstProf B 7 4 Male 86373
## 39 Prof B 41 31 Male 125196
## 40 AssocProf B 9 9 Male 100938
## 41 Prof B 23 2 Male 146500
## 42 AssocProf B 23 23 Male 93418
## 43 Prof B 40 27 Male 101299
## 44 Prof B 19 19 Male 94384
## 45 Prof B 25 15 Male 114778
## 46 Prof B 40 28 Male 98193
## 47 Prof B 23 19 Female 151768
## 48 Prof B 25 25 Female 140096
## 49 AsstProf B 1 1 Male 70768
## 50 Prof B 28 28 Male 126621
## 51 Prof B 12 11 Male 108875
## 52 AsstProf B 11 3 Female 74692
## 53 Prof B 16 9 Male 106639
## 54 AssocProf B 12 11 Male 103760
## 55 AssocProf B 14 5 Male 83900
## 56 Prof B 23 21 Male 117704
## 57 AssocProf B 9 8 Male 90215
## 58 AssocProf B 10 9 Male 100135
## 59 AsstProf B 8 3 Male 75044
## 60 AssocProf B 9 8 Male 90304
## 61 AsstProf B 3 2 Male 75243
## 62 Prof B 33 31 Male 109785
## 63 AssocProf B 11 11 Female 103613
## 64 AsstProf B 4 3 Male 68404
## 65 AssocProf B 9 8 Male 100522
## 66 Prof B 22 12 Male 101000
## 67 Prof B 35 31 Male 99418
## 68 Prof B 17 17 Female 111512
## 69 Prof B 28 36 Male 91412
## 70 Prof B 17 2 Male 126320
## 71 Prof B 45 45 Male 146856
## 72 Prof B 29 19 Male 100131
## 73 Prof B 35 34 Male 92391
## 74 Prof B 28 23 Male 113398
## 75 AsstProf B 8 3 Male 73266
## 76 Prof B 17 3 Male 150480
## 77 Prof B 26 19 Male 193000
## 78 AsstProf B 3 1 Male 86100
## 79 AsstProf B 6 2 Male 84240
## 80 Prof B 43 28 Male 150743
## 81 Prof B 17 16 Male 135585
## 82 Prof B 22 20 Male 144640
## 83 AsstProf B 6 2 Male 88825
## 84 Prof B 17 18 Female 122960
## 85 Prof B 15 14 Male 132825
## 86 Prof B 37 37 Male 152708
## 87 AsstProf B 2 2 Male 88400
## 88 Prof B 25 25 Male 172272
## 89 AssocProf B 9 7 Male 107008
## 90 AsstProf B 10 5 Female 97032
## 91 AssocProf B 10 7 Male 105128
## 92 AssocProf B 10 7 Male 105631
## 93 Prof B 38 38 Male 166024
## 94 Prof B 21 20 Male 123683
## 95 AsstProf B 4 0 Male 84000
## 96 AssocProf B 17 12 Male 95611
## 97 Prof B 13 7 Male 129676
## 98 Prof B 30 14 Male 102235
## 99 Prof B 41 26 Male 106689
## 100 Prof B 42 25 Male 133217
## 101 Prof B 28 23 Male 126933
## 102 Prof B 16 5 Male 153303
## 103 Prof B 20 14 Female 127512
## 104 AssocProf A 18 10 Male 83850
## 105 Prof A 31 28 Male 113543
## 106 AssocProf A 11 8 Male 82099
## 107 AssocProf A 10 8 Male 82600
## 108 AssocProf A 15 8 Male 81500
## 109 Prof A 40 31 Male 131205
## 110 Prof A 20 16 Male 112429
## 111 AssocProf A 19 16 Male 82100
## 112 AsstProf A 3 1 Male 72500
## 113 Prof A 37 37 Male 104279
## 114 Prof A 12 0 Female 105000
## 115 Prof A 21 9 Male 120806
## 116 Prof A 30 29 Male 148500
## 117 Prof A 39 36 Male 117515
## 118 AsstProf A 4 1 Male 72500
## 119 AsstProf A 5 3 Female 73500
## 120 Prof A 14 14 Male 115313
## 121 Prof A 32 32 Male 124309
## 122 Prof A 24 22 Male 97262
## 123 Prof A 24 22 Male 96614
## 124 Prof A 54 49 Male 78162
## 125 Prof A 28 26 Male 155500
## 126 AsstProf A 2 0 Female 72500
## 127 Prof A 32 30 Male 113278
## 128 AsstProf A 4 2 Male 73000
## 129 AssocProf A 11 9 Male 83001
## 130 Prof A 56 57 Male 76840
## 131 AssocProf A 10 8 Female 77500
## 132 AsstProf A 3 1 Female 72500
## 133 Prof A 35 25 Male 168635
## 134 Prof A 20 18 Male 136000
## 135 Prof A 16 14 Male 108262
## 136 Prof A 17 14 Male 105668
## 137 AssocProf A 10 7 Male 73877
## 138 Prof A 21 18 Male 152664
## 139 AssocProf A 15 10 Male 81500
## 140 Prof A 19 11 Male 106608
## 141 AsstProf B 3 3 Male 89942
## 142 Prof B 27 27 Male 112696
## 143 Prof B 28 28 Male 119015
## 144 AsstProf B 4 4 Male 92000
## 145 Prof B 27 27 Male 156938
## 146 Prof B 36 26 Female 144651
## 147 AsstProf B 4 3 Male 95079
## 148 Prof B 14 12 Male 128148
## 149 AsstProf B 4 4 Male 92000
## 150 Prof B 21 9 Male 111168
## 151 AssocProf B 12 10 Female 103994
## 152 AsstProf B 4 0 Male 92000
## 153 Prof B 21 21 Male 118971
## 154 AssocProf B 12 18 Male 113341
## 155 AsstProf B 1 0 Male 88000
## 156 AssocProf B 6 6 Male 95408
## 157 Prof B 15 16 Male 137167
## 158 AsstProf B 2 2 Male 89516
## 159 Prof B 26 19 Male 176500
## 160 AssocProf B 22 7 Male 98510
## 161 AsstProf B 3 3 Male 89942
## 162 AsstProf B 1 0 Male 88795
## 163 Prof B 21 8 Male 105890
## 164 Prof B 16 16 Male 167284
## 165 Prof B 18 19 Male 130664
## 166 AssocProf B 8 6 Male 101210
## 167 Prof B 25 18 Male 181257
## 168 AsstProf B 5 5 Male 91227
## 169 Prof B 19 19 Male 151575
## 170 Prof B 37 24 Male 93164
## 171 Prof B 20 20 Male 134185
## 172 AssocProf B 17 6 Male 105000
## 173 Prof B 28 25 Male 111751
## 174 AssocProf B 10 7 Male 95436
## 175 AssocProf B 13 9 Male 100944
## 176 Prof B 27 14 Male 147349
## 177 AsstProf B 3 3 Female 92000
## 178 Prof B 11 11 Male 142467
## 179 Prof B 18 5 Male 141136
## 180 AssocProf B 8 8 Male 100000
## 181 Prof B 26 22 Male 150000
## 182 Prof B 23 23 Male 101000
## 183 Prof B 33 30 Male 134000
## 184 AssocProf B 13 10 Female 103750
## 185 Prof B 18 10 Male 107500
## 186 AssocProf B 28 28 Male 106300
## 187 Prof B 25 19 Male 153750
## 188 Prof B 22 9 Male 180000
## 189 Prof B 43 22 Male 133700
## 190 Prof B 19 18 Male 122100
## 191 AssocProf B 19 19 Male 86250
## 192 AssocProf B 48 53 Male 90000
## 193 AssocProf B 9 7 Male 113600
## 194 AsstProf B 4 4 Male 92700
## 195 AsstProf B 4 4 Male 92000
## 196 Prof B 34 33 Male 189409
## 197 Prof B 38 22 Male 114500
## 198 AsstProf B 4 4 Male 92700
## 199 Prof B 40 40 Male 119700
## 200 Prof B 28 17 Male 160400
## 201 Prof B 17 17 Male 152500
## 202 Prof B 19 5 Male 165000
## 203 Prof B 21 2 Male 96545
## 204 Prof B 35 33 Male 162200
## 205 Prof B 18 18 Male 120000
## 206 AsstProf B 7 2 Male 91300
## 207 Prof B 20 20 Male 163200
## 208 AsstProf B 4 3 Male 91000
## 209 Prof B 39 39 Male 111350
## 210 Prof B 15 7 Male 128400
## 211 Prof B 26 19 Male 126200
## 212 AssocProf B 11 1 Male 118700
## 213 Prof B 16 11 Male 145350
## 214 Prof B 15 11 Male 146000
## 215 AssocProf B 29 22 Male 105350
## 216 Prof B 13 11 Male 119500
## 217 Prof B 21 21 Male 170000
## 218 Prof B 23 10 Male 145200
## 219 AssocProf B 13 6 Male 107150
## 220 Prof B 34 20 Male 129600
## 221 Prof A 38 35 Male 87800
## 222 Prof A 20 20 Male 122400
## 223 Prof A 16 11 Male 88175
## 224 Prof A 39 38 Male 133900
## 225 Prof A 29 27 Female 91000
## 226 AssocProf A 26 24 Female 73300
## 227 Prof A 38 19 Male 148750
## 228 Prof A 36 19 Female 117555
## 229 AsstProf A 8 3 Male 69700
## 230 Prof A 28 17 Male 81700
## 231 Prof A 25 25 Male 114000
## 232 Prof A 46 40 Male 77202
## 233 Prof A 19 6 Male 96200
## 234 AsstProf A 5 3 Male 69200
## 235 Prof A 31 30 Male 122875
## 236 Prof A 38 37 Male 102600
## 237 Prof A 23 23 Male 108200
## 238 Prof A 19 23 Male 84273
## 239 Prof A 17 11 Female 90450
## 240 Prof A 30 23 Male 91100
## 241 Prof A 21 18 Male 101100
## 242 Prof A 28 23 Male 128800
## 243 Prof A 39 39 Male 109000
## 244 Prof A 20 8 Male 102000
## 245 Prof A 31 12 Male 132000
## 246 AsstProf A 4 2 Female 77500
## 247 Prof A 28 7 Female 116450
## 248 AssocProf A 12 8 Male 83000
## 249 Prof A 22 22 Male 140300
## 250 AssocProf A 30 23 Male 74000
## 251 AsstProf A 9 3 Male 73800
## 252 Prof A 32 30 Male 92550
## 253 AssocProf A 41 33 Male 88600
## 254 Prof A 45 45 Male 107550
## 255 Prof A 31 26 Male 121200
## 256 Prof A 31 31 Male 126000
## 257 Prof A 37 35 Male 99000
## 258 Prof A 36 30 Male 134800
## 259 Prof A 43 43 Male 143940
## 260 Prof A 14 10 Male 104350
## 261 Prof A 47 44 Male 89650
## 262 Prof A 13 7 Male 103700
## 263 Prof A 42 40 Male 143250
## 264 AsstProf A 4 1 Male 73000
## 265 AsstProf A 8 4 Male 74000
## 266 AsstProf A 8 3 Female 78500
## 267 Prof A 12 6 Male 93000
## 268 Prof A 52 48 Male 107200
## 269 Prof A 31 27 Male 163200
## 270 Prof A 24 18 Male 107100
## 271 Prof A 46 46 Male 100600
## 272 Prof A 39 38 Male 136500
## 273 Prof A 37 27 Male 103600
## 274 Prof A 51 51 Male 57800
## 275 Prof A 45 43 Male 155865
## 276 AssocProf A 8 6 Male 88650
## 277 AssocProf A 49 49 Male 81800
## 278 Prof A 28 27 Male 115800
## 279 Prof A 29 27 Male 150500
## 280 AsstProf A 8 5 Male 74000
## 281 Prof A 33 7 Male 174500
## 282 Prof A 32 28 Male 168500
## 283 Prof A 39 9 Male 183800
## 284 Prof A 19 7 Male 107300
## 285 Prof A 40 36 Male 97150
## 286 Prof A 18 18 Male 126300
## 287 Prof A 17 11 Male 148800
## 288 Prof A 49 43 Male 72300
## 289 Prof A 39 36 Male 88600
## 290 Prof A 27 16 Male 127100
## 291 Prof A 28 13 Male 170500
## 292 Prof A 14 4 Male 105260
## 293 Prof A 46 44 Male 144050
## 294 Prof A 33 31 Male 111350
## 295 AsstProf A 7 4 Male 74500
## 296 Prof A 31 28 Male 122500
## 297 AsstProf A 5 0 Male 74000
## 298 Prof A 22 15 Male 166800
## 299 Prof A 20 7 Male 92050
## 300 Prof A 14 9 Male 108100
## 301 Prof A 29 19 Male 94350
## 302 Prof A 35 35 Male 100351
## 303 Prof A 22 6 Male 146800
## 304 AsstProf B 6 3 Male 84716
## 305 Prof B 46 45 Male 67559
## 306 Prof B 16 16 Male 134550
## 307 Prof B 16 15 Male 135027
## 308 Prof B 24 23 Male 104428
## 309 AssocProf B 9 9 Male 95642
## 310 Prof B 24 15 Female 161101
## 311 Prof B 30 31 Male 162221
## 312 AsstProf B 8 4 Male 84500
## 313 Prof B 23 15 Male 124714
## 314 Prof B 37 37 Male 151650
## 315 AssocProf B 10 10 Male 99247
## 316 Prof B 23 23 Male 134778
## 317 Prof B 49 60 Male 192253
## 318 Prof B 20 9 Male 116518
## 319 Prof B 18 10 Female 105450
## 320 Prof B 33 19 Male 145098
## 321 AssocProf B 19 6 Female 104542
## 322 Prof B 36 38 Male 151445
## 323 Prof B 35 23 Male 98053
## 324 Prof B 13 12 Male 145000
## 325 Prof B 32 25 Male 128464
## 326 Prof B 37 15 Male 137317
## 327 Prof B 13 11 Male 106231
## 328 Prof B 17 17 Female 124312
## 329 Prof B 38 38 Male 114596
## 330 Prof B 31 31 Male 162150
## 331 Prof B 32 35 Male 150376
## 332 Prof B 15 10 Male 107986
## 333 Prof B 41 27 Male 142023
## 334 Prof B 39 33 Male 128250
## 335 AsstProf B 4 3 Male 80139
## 336 Prof B 27 28 Male 144309
## 337 Prof B 56 49 Male 186960
## 338 Prof B 38 38 Male 93519
## 339 Prof B 26 27 Male 142500
## 340 Prof B 22 20 Male 138000
## 341 AsstProf B 8 1 Male 83600
## 342 Prof B 25 21 Male 145028
## 343 Prof A 49 40 Male 88709
## 344 Prof A 39 35 Male 107309
## 345 Prof A 28 14 Female 109954
## 346 AsstProf A 11 4 Male 78785
## 347 Prof A 14 11 Male 121946
## 348 Prof A 23 15 Female 109646
## 349 Prof A 30 30 Male 138771
## 350 AssocProf A 20 17 Male 81285
## 351 Prof A 43 40 Male 101036
## 352 Prof A 15 10 Male 115435
## 353 Prof A 35 30 Male 131950
## 354 Prof A 33 31 Male 134690
## 355 AssocProf A 13 8 Male 78182
## 356 Prof A 23 20 Male 110515
## 357 Prof A 12 7 Male 109707
## 358 Prof A 30 26 Male 136660
## 359 Prof A 27 19 Male 103275
## 360 Prof A 28 26 Male 103649
## 361 AsstProf A 4 1 Male 74856
## 362 AsstProf A 6 3 Male 77081
## 363 Prof A 38 38 Male 150680
## 364 AsstProf A 8 3 Male 75996
## 365 Prof A 27 23 Male 172505
## 366 AssocProf A 8 5 Male 86895
## 367 Prof A 44 44 Male 105000
## 368 Prof A 27 21 Male 125192
## 369 Prof A 15 9 Male 114330
## 370 Prof A 29 27 Male 139219
## 371 Prof A 29 15 Male 109305
## 372 Prof A 38 36 Male 119450
## 373 Prof A 33 18 Male 186023
## 374 Prof A 40 19 Male 166605
## 375 Prof A 30 19 Male 151292
## 376 Prof A 33 30 Male 103106
## 377 Prof A 31 19 Male 150564
## 378 Prof A 42 25 Male 101738
## 379 Prof A 25 15 Male 95329
grouped_data_salaries <- Salaries %>%
group_by(rank, sex, discipline)
normality_test_salaries <- shapiro_test(grouped_data_salaries, salary)
# Create a table for the normality test results
table_salaries <- normality_test_salaries %>%
kbl() %>%
kable_material_dark()
table_salaries
| rank | discipline | sex | variable | statistic | p |
|---|---|---|---|---|---|
| AsstProf | A | Female | salary | 0.813 | 0.103 |
| AsstProf | B | Female | salary | 0.889 | 0.354 |
| AsstProf | A | Male | salary | 0.953 | 0.581 |
| AsstProf | B | Male | salary | 0.941 | 0.046 |
| AssocProf | A | Female | salary | 0.976 | 0.703 |
| AssocProf | B | Female | salary | 0.916 | 0.514 |
| AssocProf | A | Male | salary | 0.899 | 0.079 |
| AssocProf | B | Male | salary | 0.976 | 0.698 |
| Prof | A | Female | salary | 0.934 | 0.549 |
| Prof | B | Female | salary | 0.974 | 0.923 |
| Prof | A | Male | salary | 0.967 | 0.005 |
| Prof | B | Male | salary | 0.986 | 0.218 |
# Colors for the QQ plot
colors_salaries <- c("male" = "blue", "female" = "pink")
# QQ plot for Salaries dataset
ggqqplot(Salaries, x = "salary",
color = "sex",
shape = "sex",
fill = "sex",
title = "QQ Plot of Salaries by Gender and Discipline",
caption = "Data source: Salaries") +
scale_color_manual(values = colors_salaries) +
theme_dark() +
theme(plot.title = element_text(hjust = 0.5)) +
facet_grid(discipline ~ sex + rank, scales = "free")
# Levene's test for Salaries dataset
levene_test_result_salaries <- Salaries %>%
levene_test(salary ~ rank*sex*discipline)
# Styled table for Levene's test results
styled_table_salaries <- levene_test_result_salaries %>%
kable("html") %>%
kable_styling("striped", full_width = FALSE) %>%
add_header_above(c(" " = 2, "Levene's Test" = 2)) %>%
row_spec(0, bold = T, color = "white", background = "#1a1a1a") %>%
column_spec(1:4, bold = T, color = "black", background = "#add8e6")
styled_table_salaries
| df1 | df2 | statistic | p |
|---|---|---|---|
| 11 | 367 | 10.9 | 0 |
# Linear model for Salaries dataset
model_salaries <- lm(salary ~ rank * sex * discipline, data = Salaries)
# Summary of the linear model
summary(model_salaries)
##
## Call:
## lm(formula = salary ~ rank * sex * discipline, data = Salaries)
##
## Residuals:
## Min 1Q Median 3Q Max
## -65169 -13128 -467 9034 67424
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74900 9499 7.88 0.000000000000036
## rankAssocProf 310 15512 0.02 0.9841
## rankProf 34732 12109 2.87 0.0044
## sexMale -1106 10969 -0.10 0.9198
## disciplineB 9290 13434 0.69 0.4897
## rankAssocProf:sexMale 7954 17289 0.46 0.6457
## rankProf:sexMale 10072 13434 0.75 0.4539
## rankAssocProf:disciplineB 19475 21063 0.92 0.3558
## rankProf:disciplineB 12914 16792 0.77 0.4423
## sexMale:disciplineB 1563 14914 0.10 0.9166
## rankAssocProf:sexMale:disciplineB -11565 22986 -0.50 0.6152
## rankProf:sexMale:disciplineB -9638 18203 -0.53 0.5968
##
## (Intercept) ***
## rankAssocProf
## rankProf **
## sexMale
## disciplineB
## rankAssocProf:sexMale
## rankProf:sexMale
## rankAssocProf:disciplineB
## rankProf:disciplineB
## sexMale:disciplineB
## rankAssocProf:sexMale:disciplineB
## rankProf:sexMale:disciplineB
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21200 on 367 degrees of freedom
## Multiple R-squared: 0.467, Adjusted R-squared: 0.451
## F-statistic: 29.2 on 11 and 367 DF, p-value: <0.0000000000000002
# Diagnostic plots for the linear model
par(mfrow = c(2, 2))
plot(model_salaries)
# Predicted salary for Salaries dataset
Salaries$predicted_salary <- predict(model_salaries, newdata = Salaries)
# Perform 3-way ANOVA without interactions
anova_without_interactions <- aov(salary ~ rank * sex * discipline, data = Salaries)
# Display the ANOVA table
summary(anova_without_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## rank 2 123716094019 61858047010 137.11 < 0.0000000000000002
## sex 1 306883104 306883104 0.68 0.41
## discipline 1 19797898739 19797898739 43.88 0.00000000012
## rank:sex 2 187252599 93626300 0.21 0.81
## rank:discipline 2 591968714 295984357 0.66 0.52
## sex:discipline 1 265245132 265245132 0.59 0.44
## rank:sex:discipline 2 157391053 78695526 0.17 0.84
## Residuals 367 165579680433 451170791
##
## rank ***
## sex
## discipline ***
## rank:sex
## rank:discipline
## sex:discipline
## rank:sex:discipline
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perform 3-way ANOVA with interactions
anova_with_interactions <- aov(salary ~ rank + sex + discipline + rank:sex + rank:discipline + sex:discipline + rank:sex:discipline, data = Salaries)
# Display the ANOVA table
summary(anova_with_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## rank 2 123716094019 61858047010 137.11 < 0.0000000000000002
## sex 1 306883104 306883104 0.68 0.41
## discipline 1 19797898739 19797898739 43.88 0.00000000012
## rank:sex 2 187252599 93626300 0.21 0.81
## rank:discipline 2 591968714 295984357 0.66 0.52
## sex:discipline 1 265245132 265245132 0.59 0.44
## rank:sex:discipline 2 157391053 78695526 0.17 0.84
## Residuals 367 165579680433 451170791
##
## rank ***
## sex
## discipline ***
## rank:sex
## rank:discipline
## sex:discipline
## rank:sex:discipline
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Interpretations: Rank and discipline have a significant effect on salary, while sex and most interactions are not significant. The highly significant p-values (< 0.05) for rank and discipline suggest that these factors contribute significantly to the variation in salary. The non-significant p-values for sex and most interactions indicate that these factors do not have a significant impact on salary in this analysis.
#———————————————————————————–
#2. Can years since doctorate (yrs.since.phd), length of service (yrs.service) #be significant as covariates?
#In the context of the ANCOVA model we ran, the null hypotheses (H_0) and #alternative hypotheses (H_A) for each of the predictors can be stated as #follows:
#·Years since Ph.D. (yrs.since.phd): #H_0: The number of years since earning a Ph.D. has no effect on the salary. #H_A: The number of years since earning a Ph.D. has a significant effect #on the salary.
#·Years of service (yrs.service): #H_0: The length of service at the college has no effect on the salary. #H_A: The length of service at the college has a significant effect #on the salary.
#·Rank: #H_0: The rank of a faculty member has no effect on the salary. #H_A: The rank of a faculty member has a significant effect on the salary.
# Convert rank to a factor
Salaries$rank <- as.factor(Salaries$rank)
# Fit an ANCOVA model
model <- aov(salary ~ yrs.since.phd + yrs.service + rank, data = Salaries)
# Summary of the model to check the significance of the covariates
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## yrs.since.phd 1 50333386805 50333386805 101.42 <0.0000000000000002 ***
## yrs.service 1 2786371451 2786371451 5.61 0.018 *
## rank 2 71875876270 35937938135 72.42 <0.0000000000000002 ***
## Residuals 374 185606779267 496274811
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Based on the summary of the ANCOVA model, here are the conclusions:
#·The p-value for yrs.since.phd is less than 2e-16, which is much smaller #than the common significance level of 0.05. Therefore, we reject the null #hypothesis and conclude that the number of years since earning a Ph.D. has #a significant effect on the salary.
#·The p-value for yrs.service is 0.00439, which is less than the common #significance level of 0.05. Therefore, we reject the null hypothesis #and conclude that the length of service at the college has a significant #effect on the salary.
#·The p-value for rank is less than 2e-16, which is much smaller than the #common significance level of 0.05. Therefore, we reject the null hypothesis #and conclude that the rank of a faculty member has a significant effect on #the salary.
#In conclusion, all three variables - yrs.since.phd, yrs.service, and #rank - appear to be significant predictors of academic salary in this dataset.
# Scatter plot for Years since Ph.D. vs Salary
plot(Salaries$yrs.since.phd, Salaries$salary, main="Years since Ph.D. vs Salary", xlab="Years since Ph.D.", ylab="Salary")
# Scatter plot for Years of service vs Salary
plot(Salaries$yrs.service, Salaries$salary, main="Years of service vs Salary", xlab="Years of service", ylab="Salary")
# Boxplot for Rank vs Salary
interaction.plot(Salaries$yrs.since.phd, Salaries$rank, Salaries$salary, main="Interaction Plot", xlab="Years since Ph.D.", ylab="Salary")
#3.Is there any significant difference in years since PhD (yrs.since.phd) and seniority (yrs.service) of different rank professors?
#Null Hypothesis (H_0): There is no significant difference in the years since PhD #(yrs.since.phd) and seniority (yrs.service) among professors of different ranks. #That is, the difference in the means of these two groups is equal to zero.
#Alternative Hypothesis (H_A): There is a significant difference in the years since PhD #(yrs.since.phd) and seniority (yrs.service) among professors of different ranks. #That is, the difference in the means of these two groups is not equal to zero.
#Comparison between years since PhD and years of service (MANOVA)
manova_model <- manova(cbind(yrs.since.phd, yrs.service) ~ rank, data = Salaries)
summary_result_manova <- summary(manova_model)
print(summary_result_manova) # Look for significant differences between ranks for these two variables
## Df Pillai approx F num Df den Df Pr(>F)
## rank 2 0.495 61.9 4 752 <0.0000000000000002 ***
## Residuals 376
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1