This accompanies my piece for the Head Quarters blog on The Guardian website: https://www.theguardian.com/science/head-quarters/2017/feb/23/autism-diagnosis-by-brain-scan-its-time-for-a-reality-check
The code plots data from Hazlett et (2017)’s study of brain growth in babies at high familial risk of autism.
In the first part I replot Figure 1 from the paper, which shows longitudinal growth curves. Animating the data shows more clearly the differences and overlap between babies who do vs do not receive an autism diagnosis.
The second part uses waffle plots to represent the outcome of machine learning techniques applied to the brain data to predict diagnostic outcome.
library(openxlsx)
library(ggplot2)
library(grid)
library(magick)
library(knitr)
library(waffle)
Underlying data for the plot were included in the online version of the paper. Read in excel file. Exclude the “low risk” group (coded as 0). Rename the two remaining groups.
Data <- read.xlsx("data/nature21369-f1.xlsx")
Data <- Data[(Data$DSM3G!=0),]
Data$DSM3G <- as.factor(Data$DSM3G)
levels(Data$DSM3G)[levels(Data$DSM3G)=="1"] <- "Not autistic"
levels(Data$DSM3G)[levels(Data$DSM3G)=="2"] <- "Autistic"
Data$TimePoint <- rep(c(6,12,24),length(unique(Data$rCandID)))
PlotFig <- function(Data, DV) {
myPlot <- ggplot(data=Data, aes_string(x = "Length_Age", y = DV, group = "rCandID", colour = "DSM3G")) +
geom_freqpoly(stat = "identity", size = I(.3)) +
scale_x_continuous("Age / months",limits=c(0,42),breaks=seq(0, 40, 10)) +
theme_bw() +
theme(text=element_text(size=14), plot.title = element_text(hjust = 0.5, size=18), legend.justification=c(1,0), legend.position=c(0.9,0.05), legend.title=element_blank(), legend.text = element_text(size = 14), axis.line = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
scale_colour_manual(values=c("grey", "red"), breaks=c("Autistic","Not autistic"))
myPlot
}
This function produces a three-panel figure showing total brain volume, surface are, and cortical depth. Individual plots are created in ggplot2 and combined using grid.arrange from gridExtra.
PlotPanelFig <- function(Data, FName = "Image.png") {
TBV <- PlotFig(Data=Data, DV="TBV") +
ggtitle("Total brain volume") +
scale_y_continuous(limits=c(0,1300000),breaks=seq(0, 1400000, 200000)) +
ylab(bquote('Volume / '*mm^3*''))
SA <- PlotFig(Data=Data, DV="SAAll") +
ggtitle("Cortical surface area") +
scale_y_continuous(limits=c(0,83000),breaks=seq(0, 80000, 10000)) +
ylab(bquote('Area / '*mm^2*''))
CT <- PlotFig(Data=Data, DV="CTAll") +
ggtitle("Cortical thickness") +
scale_y_continuous(limits=c(0,7),breaks=seq(0, 7, 1)) +
ylab("Thickness / mm")
# NB This method of combining plots ensures that all the plot areas are equal in size
plots <- list(TBV, SA, CT)
grobs <- lapply(plots, ggplotGrob)
g <- do.call(gridExtra::cbind.gtable, grobs)
png(filename=paste0("output/figures/",FName), width=1000, height=400)
grid.draw(g)
dev.off()
}
Save the two frames as PNG files. Frame 1 includes just the data from the babies who were not diagnosed with autism. Frame 2 includes both groups.
PlotPanelFig (Data[(Data$DSM3G=="Not autistic"),], FName="Frame1.png")
## quartz_off_screen
## 2
knitr::include_graphics("output/figures/Frame1.png")
PlotPanelFig (Data, FName="Frame2.png")
## quartz_off_screen
## 2
knitr::include_graphics("output/figures/Frame2.png")
Combine the two frames as a GIF. This requires the “magick” R package.
Frames <- c(image_read("output/figures/Frame1.png"), image_read("output/figures/Frame2.png"))
Animation <- image_animate(image_scale(Frames, "1000x400"), fps = 0.5, dispose = "previous")
image_write(Animation, "output/figures/BrainGrowth.gif")
knitr::include_graphics("output/figures/BrainGrowth.gif")
Data are taken from Extended Data Table 4 and page 12 of the Supplementary Information
df <- data.frame(DiagnosticGroup = c("High Risk Autism", "High Risk Negative", "Low Risk"),
SampleSize = c(70, 248, 117),
TestPositive = c(30, 7, 8),
TestNegative = c(4, 138, 76))
df$Incomplete <- df$SampleSize - df$TestPositive - df$TestNegative
kable(df, row.names=FALSE, caption="Frequencies of classification outcome", digits=0)
| DiagnosticGroup | SampleSize | TestPositive | TestNegative | Incomplete |
|---|---|---|---|---|
| High Risk Autism | 70 | 30 | 4 | 36 |
| High Risk Negative | 248 | 7 | 138 | 103 |
| Low Risk | 117 | 8 | 76 | 33 |
Create separate waffle plots for babies who do versus do not receive an autism diagnosis using data as presented in the paper. Combine using the “iron” function and output as PNG file.
ASD_babies <- c(`Positive`=df$TestPositive[1], `Negative`=df$TestNegative[1])
ASD_waffle <- waffle(ASD_babies, rows=10, size=0.5,
colors=c("dark red", "red"),
title="Autistic babies",
pad=11) +
theme(legend.text = element_text(size = 14))
Neg_babies <- c(`Positive`=df$TestPositive[2], `Negative`=df$TestNegative[2])
Neg_waffle <- waffle(Neg_babies, rows=10, size=0.5,
colors=c("gray45", "gray70"),
xlab="1 square = 1 baby",
title="Non-autistic babies") +
theme(legend.text = element_text(size = 14))
png(filename="output/figures/Waffle1.png", width=1000, height=400)
iron(ASD_waffle, Neg_waffle)
dev.off()
## quartz_off_screen
## 2
knitr::include_graphics("output/figures/Waffle1.png")
A large number of babies were excluded due incomplete scan data. Add this information to the two waffle plots.
ASD_babies <- c(`Positive`=df$TestPositive[1], `Negative`=df$TestNegative[1], `Incomplete`=df$Incomplete[1])
ASD_waffle <- waffle(ASD_babies, rows=10, size=0.5,
colors=c("dark red", "red", "pink"),
title="Autistic babies",
pad=18) +
theme(legend.text = element_text(size = 14))
Neg_babies <- c(`Positive`=df$TestPositive[2], `Negative`=df$TestNegative[2], `Incomplete`=df$Incomplete[2])
Neg_babies <- c(`Positive`=7, `Negative`=138, `Incomplete`=103)
Neg_waffle <- waffle(Neg_babies, rows=10, size=0.5,
colors=c("gray45", "gray70", "light grey"),
xlab="1 square = 1 baby",
title="Non-autistic babies") +
theme(legend.text = element_text(size = 14))
png(filename="output/figures/Waffle2.png", width=1000, height=400)
iron(ASD_waffle, Neg_waffle)
dev.off()
## quartz_off_screen
## 2
knitr::include_graphics("output/figures/Waffle2.png")
If the scanning protocol is extended to the general population then the base rate needs to be taken into account. Scale up the sample size for the Non-autistic group to reflect the 1 in 68 prevalence of autism. Update the waffle plot.
OldSS <- df$SampleSize[2]
ScaledSS <- 67 * df$SampleSize[1]
ScalingFactor <- ScaledSS / OldSS
df$SampleSize[2] <- ScaledSS
df$TestPositive[2] <- round(df$TestPositive[2] * ScalingFactor)
df$TestNegative[2] <- round(df$TestNegative[2] * ScalingFactor)
df$Incomplete[2] <- round(df$Incomplete[2] * ScalingFactor)
kable(df[(df$DiagnosticGroup!="Low Risk"),], row.names=FALSE, caption="Frequencies of classification outcome", digits=0)
| DiagnosticGroup | SampleSize | TestPositive | TestNegative | Incomplete |
|---|---|---|---|---|
| High Risk Autism | 70 | 30 | 4 | 36 |
| High Risk Negative | 4690 | 132 | 2610 | 1948 |
NegRows <- 44
NegColumns <- ceiling(df$SampleSize[2] / NegRows)
ASD_babies <- c(`Positive`=df$TestPositive[1], `Negative`=df$TestNegative[1], `Incomplete`=df$Incomplete[1])
ASD_waffle <- waffle(ASD_babies, rows=10, size=1,
colors=c("dark red", "red", "pink"),
title="Autistic babies",
pad=NegColumns-7) +
theme(legend.text = element_text(size = 14))
Neg_babies <- c(`Positive`=df$TestPositive[2], `Negative`=df$TestNegative[2], `Incomplete`=df$Incomplete[2])
Neg_waffle <- waffle(Neg_babies, rows=NegRows, size=1,
colors=c("gray45", "gray70", "light grey"),
xlab="1 square = 1 baby",
title="Non-autistic babies") +
theme(legend.text = element_text(size = 14))
png(filename="output/figures/Waffle3.png", width=1000, height=600)
iron(ASD_waffle, Neg_waffle)
dev.off()
## quartz_off_screen
## 2
knitr::include_graphics("output/figures/Waffle3.png")
Heather Cody Hazlett, et al “Early Brain Development in Infants at High Risk for Autism Spectrum Disorder” Nature, in print Feb. 16, 2017. http://doi.org/10.1038/nature21369