options(scipen=999)
library(table1, quietly=TRUE)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(DescTools, quietly=TRUE)
library(ggplot2, quietly=TRUE)
library(gridExtra, quietly=TRUE)
library(ggExtra, quietly=TRUE)
library(plyr, quietly=T)
library(dplyr, quietly=T)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggthemes, quietly=T)

Reading data

dat = read.csv("~/Dropbox/NHMRC/NHMRC 2019/Investigator Grant/2019 Investigator Outcomes.csv")
# Subsetting data
leaders = subset(dat, Type=="Leadership")
el = subset(dat, Type=="EL1" | Type=="EL2")

Descriptive analyses

# Number of grants by type 
df = dat %>%
   group_by(Type) %>%
   summarise(counts = n())
p = ggplot(data=df, aes(x=Type, y=counts)) 
p = p + geom_bar(fill = "#0073C2FF", stat = "identity") 
p = p + geom_text(aes(label = counts), vjust = -0.3)
p + xlab("Type") + ylab("Count")

# Number of grants by institution
table1(~Institution | Type, data=dat)
EL1
(n=86)
EL2
(n=43)
Leadership
(n=117)
Overall
(n=246)
Institution
Australian National University 3 (3.5%) 2 (4.7%) 0 (0%) 5 (2.0%)
Baker Heart and Diabetes Institute 1 (1.2%) 0 (0%) 2 (1.7%) 3 (1.2%)
Bond University 0 (0%) 0 (0%) 1 (0.9%) 1 (0.4%)
Burnet Institute 1 (1.2%) 0 (0%) 1 (0.9%) 2 (0.8%)
Centre for Eye Research Australia 1 (1.2%) 0 (0%) 1 (0.9%) 2 (0.8%)
Curtin University 2 (2.3%) 2 (4.7%) 1 (0.9%) 5 (2.0%)
Deakin University 2 (2.3%) 1 (2.3%) 2 (1.7%) 5 (2.0%)
Edith Cowan University 1 (1.2%) 0 (0%) 0 (0%) 1 (0.4%)
Flinders University 1 (1.2%) 0 (0%) 0 (0%) 1 (0.4%)
Griffith University 0 (0%) 1 (2.3%) 1 (0.9%) 2 (0.8%)
James Cook University 1 (1.2%) 0 (0%) 0 (0%) 1 (0.4%)
La Trobe University 0 (0%) 1 (2.3%) 2 (1.7%) 3 (1.2%)
Macquarie University 0 (0%) 2 (4.7%) 4 (3.4%) 6 (2.4%)
Menzies School of Health Research 0 (0%) 1 (2.3%) 1 (0.9%) 2 (0.8%)
Monash University 13 (15.1%) 5 (11.6%) 20 (17.1%) 38 (15.4%)
Murdoch Childrens Research Institute 1 (1.2%) 1 (2.3%) 1 (0.9%) 3 (1.2%)
QIMR Berghofer Medical Research Institute 2 (2.3%) 1 (2.3%) 6 (5.1%) 9 (3.7%)
Queensland University of Technology 2 (2.3%) 0 (0%) 0 (0%) 2 (0.8%)
South Australian Health and Medical Research Institute 1 (1.2%) 0 (0%) 1 (0.9%) 2 (0.8%)
University of Adelaide 2 (2.3%) 1 (2.3%) 3 (2.6%) 6 (2.4%)
University of Melbourne 15 (17.4%) 9 (20.9%) 18 (15.4%) 42 (17.1%)
University of New South Wales 9 (10.5%) 4 (9.3%) 15 (12.8%) 28 (11.4%)
University of Newcastle 1 (1.2%) 3 (7.0%) 1 (0.9%) 5 (2.0%)
University of Queensland 12 (14.0%) 0 (0%) 8 (6.8%) 20 (8.1%)
University of South Australia 0 (0%) 0 (0%) 1 (0.9%) 1 (0.4%)
University of Sydney 5 (5.8%) 3 (7.0%) 9 (7.7%) 17 (6.9%)
University of Tasmania 1 (1.2%) 2 (4.7%) 0 (0%) 3 (1.2%)
University of Technology Sydney 1 (1.2%) 0 (0%) 1 (0.9%) 2 (0.8%)
University of Western Australia 1 (1.2%) 2 (4.7%) 5 (4.3%) 8 (3.3%)
University of Wollongong 2 (2.3%) 0 (0%) 2 (1.7%) 4 (1.6%)
Walter and Eliza Hall Institute of Medical Research 5 (5.8%) 2 (4.7%) 10 (8.5%) 17 (6.9%)

Analysis of H index of leadership awardees

# Summary of H index
Desc(leaders$Hindex)
## ------------------------------------------------------------------------- 
## leaders$Hindex (integer)
## 
##   length      n    NAs  unique     0s   mean  meanCI
##      117     65     52      43      0  55.97   50.97
##           55.6%  44.4%           0.0%          60.96
##                                                     
##      .05    .10    .25  median    .75    .90     .95
##    34.20  37.00  42.00   50.00  63.00  83.40   89.60
##                                                     
##    range     sd  vcoef     mad    IQR   skew    kurt
##   103.00  20.16   0.36   14.83  21.00   1.44    2.32
##                                                     
## lowest : 22, 32, 33, 34, 35 (2)
## highest: 88, 90, 114, 118, 125

Desc(leaders$AdjHindex)
## ------------------------------------------------------------------------- 
## leaders$AdjHindex (numeric)
## 
##   length      n    NAs  unique     0s   mean  meanCI
##      117     65     52      39      0  23.14   20.76
##           55.6%  44.4%           0.0%          25.51
##                                                     
##      .05    .10    .25  median    .75    .90     .95
##    13.00  13.94  18.00   20.60  25.00  33.60   44.40
##                                                     
##    range     sd  vcoef     mad    IQR   skew    kurt
##    49.50   9.58   0.41    6.52   7.00   1.73    3.25
##                                                     
## lowest : 11.5, 13.0 (5), 13.9, 14.0, 14.8
## highest: 41.0, 42.0, 45.0 (2), 50.0, 61.0

mu1 = ddply(leaders, "Type", summarise, grp.mean=median(na.omit(Hindex)))
mu2 = ddply(leaders, "Type", summarise, grp.mean=median(na.omit(AdjHindex)))

p = ggplot(data=leaders, aes(x=Hindex))
p = p + geom_histogram(fill="blue", col="white")
p1 = p + geom_vline(data=mu1, aes(xintercept=grp.mean, color="red"), linetype="dashed") + ylab("Count") + theme_economist() + theme(legend.position="none") 

p = ggplot(data=leaders, aes(x=AdjHindex))
p = p + geom_histogram(fill="green", col="white")
p2 = p + geom_vline(data=mu2, aes(xintercept=grp.mean, color="red"), linetype="dashed") + ylab("Count") + theme_economist() + theme(legend.position="none")  

grid.arrange(p1, p2, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 52 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 52 rows containing non-finite values (stat_bin).

H index by area of research

p = ggplot(data=leaders, aes(x=Area, y=Hindex, col=Area, fill=Area))
p1 = p + geom_boxplot(col="black") + geom_jitter(alpha=0.6) + theme_economist() + theme(legend.position="none")
 
p = ggplot(data=leaders, aes(x=Area, y=AdjHindex, col=Area, fill=Area))
p2 = p + geom_boxplot(col="black") + geom_jitter(alpha=0.6) + theme_economist() + theme(legend.position="none")

grid.arrange(p1, p2, ncol=2)
## Warning: Removed 52 rows containing non-finite values (stat_boxplot).
## Warning: Removed 52 rows containing missing values (geom_point).
## Warning: Removed 52 rows containing non-finite values (stat_boxplot).
## Warning: Removed 52 rows containing missing values (geom_point).