options(scipen=999)
library(table1, quietly=TRUE)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(DescTools, quietly=TRUE)
library(ggplot2, quietly=TRUE)
library(gridExtra, quietly=TRUE)
library(ggExtra, quietly=TRUE)
library(plyr, quietly=T)
library(dplyr, quietly=T)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggthemes, quietly=T)
dat = read.csv("~/Dropbox/NHMRC/NHMRC 2019/Investigator Grant/2019 Investigator Outcomes.csv")
# Subsetting data
leaders = subset(dat, Type=="Leadership")
el = subset(dat, Type=="EL1" | Type=="EL2")
# Number of grants by type
df = dat %>%
group_by(Type) %>%
summarise(counts = n())
p = ggplot(data=df, aes(x=Type, y=counts))
p = p + geom_bar(fill = "#0073C2FF", stat = "identity")
p = p + geom_text(aes(label = counts), vjust = -0.3)
p + xlab("Type") + ylab("Count")
# Number of grants by institution
table1(~Institution | Type, data=dat)
| EL1 (n=86) |
EL2 (n=43) |
Leadership (n=117) |
Overall (n=246) |
|
|---|---|---|---|---|
| Institution | ||||
| Australian National University | 3 (3.5%) | 2 (4.7%) | 0 (0%) | 5 (2.0%) |
| Baker Heart and Diabetes Institute | 1 (1.2%) | 0 (0%) | 2 (1.7%) | 3 (1.2%) |
| Bond University | 0 (0%) | 0 (0%) | 1 (0.9%) | 1 (0.4%) |
| Burnet Institute | 1 (1.2%) | 0 (0%) | 1 (0.9%) | 2 (0.8%) |
| Centre for Eye Research Australia | 1 (1.2%) | 0 (0%) | 1 (0.9%) | 2 (0.8%) |
| Curtin University | 2 (2.3%) | 2 (4.7%) | 1 (0.9%) | 5 (2.0%) |
| Deakin University | 2 (2.3%) | 1 (2.3%) | 2 (1.7%) | 5 (2.0%) |
| Edith Cowan University | 1 (1.2%) | 0 (0%) | 0 (0%) | 1 (0.4%) |
| Flinders University | 1 (1.2%) | 0 (0%) | 0 (0%) | 1 (0.4%) |
| Griffith University | 0 (0%) | 1 (2.3%) | 1 (0.9%) | 2 (0.8%) |
| James Cook University | 1 (1.2%) | 0 (0%) | 0 (0%) | 1 (0.4%) |
| La Trobe University | 0 (0%) | 1 (2.3%) | 2 (1.7%) | 3 (1.2%) |
| Macquarie University | 0 (0%) | 2 (4.7%) | 4 (3.4%) | 6 (2.4%) |
| Menzies School of Health Research | 0 (0%) | 1 (2.3%) | 1 (0.9%) | 2 (0.8%) |
| Monash University | 13 (15.1%) | 5 (11.6%) | 20 (17.1%) | 38 (15.4%) |
| Murdoch Childrens Research Institute | 1 (1.2%) | 1 (2.3%) | 1 (0.9%) | 3 (1.2%) |
| QIMR Berghofer Medical Research Institute | 2 (2.3%) | 1 (2.3%) | 6 (5.1%) | 9 (3.7%) |
| Queensland University of Technology | 2 (2.3%) | 0 (0%) | 0 (0%) | 2 (0.8%) |
| South Australian Health and Medical Research Institute | 1 (1.2%) | 0 (0%) | 1 (0.9%) | 2 (0.8%) |
| University of Adelaide | 2 (2.3%) | 1 (2.3%) | 3 (2.6%) | 6 (2.4%) |
| University of Melbourne | 15 (17.4%) | 9 (20.9%) | 18 (15.4%) | 42 (17.1%) |
| University of New South Wales | 9 (10.5%) | 4 (9.3%) | 15 (12.8%) | 28 (11.4%) |
| University of Newcastle | 1 (1.2%) | 3 (7.0%) | 1 (0.9%) | 5 (2.0%) |
| University of Queensland | 12 (14.0%) | 0 (0%) | 8 (6.8%) | 20 (8.1%) |
| University of South Australia | 0 (0%) | 0 (0%) | 1 (0.9%) | 1 (0.4%) |
| University of Sydney | 5 (5.8%) | 3 (7.0%) | 9 (7.7%) | 17 (6.9%) |
| University of Tasmania | 1 (1.2%) | 2 (4.7%) | 0 (0%) | 3 (1.2%) |
| University of Technology Sydney | 1 (1.2%) | 0 (0%) | 1 (0.9%) | 2 (0.8%) |
| University of Western Australia | 1 (1.2%) | 2 (4.7%) | 5 (4.3%) | 8 (3.3%) |
| University of Wollongong | 2 (2.3%) | 0 (0%) | 2 (1.7%) | 4 (1.6%) |
| Walter and Eliza Hall Institute of Medical Research | 5 (5.8%) | 2 (4.7%) | 10 (8.5%) | 17 (6.9%) |
# Summary of H index
Desc(leaders$Hindex)
## -------------------------------------------------------------------------
## leaders$Hindex (integer)
##
## length n NAs unique 0s mean meanCI
## 117 65 52 43 0 55.97 50.97
## 55.6% 44.4% 0.0% 60.96
##
## .05 .10 .25 median .75 .90 .95
## 34.20 37.00 42.00 50.00 63.00 83.40 89.60
##
## range sd vcoef mad IQR skew kurt
## 103.00 20.16 0.36 14.83 21.00 1.44 2.32
##
## lowest : 22, 32, 33, 34, 35 (2)
## highest: 88, 90, 114, 118, 125
Desc(leaders$AdjHindex)
## -------------------------------------------------------------------------
## leaders$AdjHindex (numeric)
##
## length n NAs unique 0s mean meanCI
## 117 65 52 39 0 23.14 20.76
## 55.6% 44.4% 0.0% 25.51
##
## .05 .10 .25 median .75 .90 .95
## 13.00 13.94 18.00 20.60 25.00 33.60 44.40
##
## range sd vcoef mad IQR skew kurt
## 49.50 9.58 0.41 6.52 7.00 1.73 3.25
##
## lowest : 11.5, 13.0 (5), 13.9, 14.0, 14.8
## highest: 41.0, 42.0, 45.0 (2), 50.0, 61.0
mu1 = ddply(leaders, "Type", summarise, grp.mean=median(na.omit(Hindex)))
mu2 = ddply(leaders, "Type", summarise, grp.mean=median(na.omit(AdjHindex)))
p = ggplot(data=leaders, aes(x=Hindex))
p = p + geom_histogram(fill="blue", col="white")
p1 = p + geom_vline(data=mu1, aes(xintercept=grp.mean, color="red"), linetype="dashed") + ylab("Count") + theme_economist() + theme(legend.position="none")
p = ggplot(data=leaders, aes(x=AdjHindex))
p = p + geom_histogram(fill="green", col="white")
p2 = p + geom_vline(data=mu2, aes(xintercept=grp.mean, color="red"), linetype="dashed") + ylab("Count") + theme_economist() + theme(legend.position="none")
grid.arrange(p1, p2, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 52 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 52 rows containing non-finite values (stat_bin).
p = ggplot(data=leaders, aes(x=Area, y=Hindex, col=Area, fill=Area))
p1 = p + geom_boxplot(col="black") + geom_jitter(alpha=0.6) + theme_economist() + theme(legend.position="none")
p = ggplot(data=leaders, aes(x=Area, y=AdjHindex, col=Area, fill=Area))
p2 = p + geom_boxplot(col="black") + geom_jitter(alpha=0.6) + theme_economist() + theme(legend.position="none")
grid.arrange(p1, p2, ncol=2)
## Warning: Removed 52 rows containing non-finite values (stat_boxplot).
## Warning: Removed 52 rows containing missing values (geom_point).
## Warning: Removed 52 rows containing non-finite values (stat_boxplot).
## Warning: Removed 52 rows containing missing values (geom_point).