library(readr)
Professorial_Salaries <- read_csv("C:/Users/24928614/Downloads/Professorial Salaries.csv")
## Rows: 397 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Rank, Discipline, Sex
## dbl (6): ID, Yrs.since.phd, Yrs.service, NPubs, Ncits, Salary
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Professorial_Salaries)
## # A tibble: 6 × 9
## ID Rank Discipline Yrs.since.phd Yrs.service Sex NPubs Ncits Salary
## <dbl> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1 Prof B 19 18 Male 18 50 139750
## 2 2 Prof B 20 16 Male 3 26 173200
## 3 3 AsstProf B 4 3 Male 2 50 79750
## 4 4 Prof B 45 39 Male 17 34 115000
## 5 5 Prof B 40 41 Male 11 41 141500
## 6 6 AssocProf B 6 6 Male 6 37 97000
#install.packages("GGall");
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
vars = Professorial_Salaries[, c("Rank", "Discipline", "Yrs.since.phd",
"Yrs.service", "NPubs", "Ncits", "Sex", "Salary")]
ggpairs(data = vars, mapping = aes(color = Sex))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#install.packages("table1")
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ Rank + Discipline + Yrs.since.phd + Yrs.service + NPubs + Ncits | Sex, data = Professorial_Salaries)
| Female (N=39) |
Male (N=358) |
Overall (N=397) |
|
|---|---|---|---|
| Rank | |||
| AssocProf | 10 (25.6%) | 54 (15.1%) | 64 (16.1%) |
| AsstProf | 11 (28.2%) | 56 (15.6%) | 67 (16.9%) |
| Prof | 18 (46.2%) | 248 (69.3%) | 266 (67.0%) |
| Discipline | |||
| A | 18 (46.2%) | 163 (45.5%) | 181 (45.6%) |
| B | 21 (53.8%) | 195 (54.5%) | 216 (54.4%) |
| Yrs.since.phd | |||
| Mean (SD) | 16.5 (9.78) | 22.9 (13.0) | 22.3 (12.9) |
| Median [Min, Max] | 17.0 [2.00, 39.0] | 22.0 [1.00, 56.0] | 21.0 [1.00, 56.0] |
| Yrs.service | |||
| Mean (SD) | 11.6 (8.81) | 18.3 (13.2) | 17.6 (13.0) |
| Median [Min, Max] | 10.0 [0, 36.0] | 18.0 [0, 60.0] | 16.0 [0, 60.0] |
| NPubs | |||
| Mean (SD) | 20.2 (14.4) | 17.9 (13.9) | 18.2 (14.0) |
| Median [Min, Max] | 18.0 [1.00, 50.0] | 13.0 [1.00, 69.0] | 13.0 [1.00, 69.0] |
| Ncits | |||
| Mean (SD) | 40.7 (16.2) | 40.2 (17.0) | 40.2 (16.9) |
| Median [Min, Max] | 36.0 [14.0, 70.0] | 35.0 [1.00, 90.0] | 35.0 [1.00, 90.0] |
##Task 4 Describe the study population with mean (SD) presented for continuous variables
#install.packages("table1")
library(table1)
table1(~ Rank + Discipline + Yrs.since.phd + Yrs.service + NPubs + Ncits | Sex, data = Professorial_Salaries, render.continuous = c(. = "Median [Q1, Q3]"))
| Female (N=39) |
Male (N=358) |
Overall (N=397) |
|
|---|---|---|---|
| Rank | |||
| AssocProf | 10 (25.6%) | 54 (15.1%) | 64 (16.1%) |
| AsstProf | 11 (28.2%) | 56 (15.6%) | 67 (16.9%) |
| Prof | 18 (46.2%) | 248 (69.3%) | 266 (67.0%) |
| Discipline | |||
| A | 18 (46.2%) | 163 (45.5%) | 181 (45.6%) |
| B | 21 (53.8%) | 195 (54.5%) | 216 (54.4%) |
| Yrs.since.phd | |||
| Median [Q1, Q3] | 17.0 [10.0, 23.5] | 22.0 [12.0, 33.0] | 21.0 [12.0, 32.0] |
| Yrs.service | |||
| Median [Q1, Q3] | 10.0 [4.00, 17.5] | 18.0 [7.00, 27.0] | 16.0 [7.00, 27.0] |
| NPubs | |||
| Median [Q1, Q3] | 18.0 [11.0, 27.0] | 13.0 [8.00, 25.8] | 13.0 [8.00, 26.0] |
| Ncits | |||
| Median [Q1, Q3] | 36.0 [28.5, 54.5] | 35.0 [28.0, 50.0] | 35.0 [28.0, 50.0] |
#install.packages("compareGroups")
library(compareGroups)
createTable(compareGroups(Sex ~ Rank + Discipline + Yrs.since.phd + Yrs.service + NPubs + Ncits, data = Professorial_Salaries))
##
## --------Summary descriptives table by 'Sex'---------
##
## _______________________________________________
## Female Male p.overall
## N=39 N=358
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## Rank: 0.014
## AssocProf 10 (25.6%) 54 (15.1%)
## AsstProf 11 (28.2%) 56 (15.6%)
## Prof 18 (46.2%) 248 (69.3%)
## Discipline: 1.000
## A 18 (46.2%) 163 (45.5%)
## B 21 (53.8%) 195 (54.5%)
## Yrs.since.phd 16.5 (9.78) 22.9 (13.0) <0.001
## Yrs.service 11.6 (8.81) 18.3 (13.2) <0.001
## NPubs 20.2 (14.4) 17.9 (13.9) 0.352
## Ncits 40.7 (16.2) 40.2 (17.0) 0.851
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#Task 6 Post the exercise into your Rpubs account