income = read.csv("C:\\VN trips\\VN trip 2 (Sept 2022)\\Can Tho 2022\\Income and PhDs.csv", header = T)
head(income)
## id TimeSincePhD NPubs Sex Citations Salary
## 1 1 3 18 1 50 51876
## 2 2 6 3 1 26 54511
## 3 3 3 2 1 50 53425
## 4 4 8 17 0 34 61683
## 5 5 9 11 1 41 52926
## 6 6 6 6 0 37 47034
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ TimeSincePhD + NPubs + Citations + Salary | as.factor(Sex), data = income)
0 (N=35) |
1 (N=27) |
Overall (N=62) |
|
---|---|---|---|
TimeSincePhD | |||
Mean (SD) | 7.57 (4.99) | 5.78 (2.91) | 6.79 (4.28) |
Median [Min, Max] | 6.00 [1.00, 21.0] | 5.00 [2.00, 16.0] | 6.00 [1.00, 21.0] |
NPubs | |||
Mean (SD) | 20.1 (15.4) | 15.7 (11.8) | 18.2 (14.0) |
Median [Min, Max] | 17.0 [1.00, 69.0] | 12.0 [2.00, 50.0] | 13.0 [1.00, 69.0] |
Citations | |||
Mean (SD) | 42.5 (18.9) | 37.3 (14.5) | 40.2 (17.2) |
Median [Min, Max] | 40.0 [1.00, 90.0] | 34.0 [14.0, 83.0] | 35.0 [1.00, 90.0] |
Salary | |||
Mean (SD) | 56500 (10800) | 50600 (11700) | 53900 (11500) |
Median [Min, Max] | 55600 [37900, 83500] | 52500 [6330, 74300] | 53500 [6330, 83500] |
library("tidyverse")
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
income_n = income %>%
mutate(gender = case_when(Sex == 0 ~ "Male",
Sex == 1 ~ "Female"))
table1(~ TimeSincePhD + NPubs + Citations + Salary | gender, data = income_n)
Female (N=27) |
Male (N=35) |
Overall (N=62) |
|
---|---|---|---|
TimeSincePhD | |||
Mean (SD) | 5.78 (2.91) | 7.57 (4.99) | 6.79 (4.28) |
Median [Min, Max] | 5.00 [2.00, 16.0] | 6.00 [1.00, 21.0] | 6.00 [1.00, 21.0] |
NPubs | |||
Mean (SD) | 15.7 (11.8) | 20.1 (15.4) | 18.2 (14.0) |
Median [Min, Max] | 12.0 [2.00, 50.0] | 17.0 [1.00, 69.0] | 13.0 [1.00, 69.0] |
Citations | |||
Mean (SD) | 37.3 (14.5) | 42.5 (18.9) | 40.2 (17.2) |
Median [Min, Max] | 34.0 [14.0, 83.0] | 40.0 [1.00, 90.0] | 35.0 [1.00, 90.0] |
Salary | |||
Mean (SD) | 50600 (11700) | 56500 (10800) | 53900 (11500) |
Median [Min, Max] | 52500 [6330, 74300] | 55600 [37900, 83500] | 53500 [6330, 83500] |
library(compareGroups)
createTable(compareGroups(gender ~ TimeSincePhD + NPubs + Citations + Salary, data = income_n))
##
## --------Summary descriptives table by 'gender'---------
##
## __________________________________________________
## Female Male p.overall
## N=27 N=35
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## TimeSincePhD 5.78 (2.91) 7.57 (4.99) 0.082
## NPubs 15.7 (11.8) 20.1 (15.4) 0.203
## Citations 37.3 (14.5) 42.5 (18.9) 0.232
## Salary 50613 (11666) 56510 (10777) 0.046
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(data = income_n, mapping = aes(color = gender))
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
vars = income_n[, c("TimeSincePhD", "NPubs", "Citations", "Salary", "gender")]
ggpairs(data = vars, mapping = aes(color = gender))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.