library(tumgr) ## For the example dataset
## Warning: package 'tumgr' was built under R version 3.4.4
## Loading required package: minpack.lm
## Warning: package 'minpack.lm' was built under R version 3.4.4
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.4.4
## Loading required package: magrittr
# Introduction:
# The article explains how spider plots are being used to display the activity of tumors. The data of patients with tumors using a drug and patients in a control group were displayed overtime. They provide information as to why the spider plot most accurately displays the data. The tumgr package is installed to provide the data for this example. The data was randomized, and a follow-up time of 240 days was set in order to create the display. Original post: https://www.r-bloggers.com/visualization-of-tumor-response-spider-plots/
# Analysis:
# This article explores the usage of spider plots as a way of expressing the change in a tumor's growth over a period of time. It uses a percentage change as a way (to regulate the data). Since every patient would have a different sized tumor, they could not compare tumor sizes. The "baseline" refers to the patient's tumor size at the time it was found. Each data point represents that patient's tumor size at the time of their check in. By connecting all the data points for each patient, it created individual timelines. The graph was color coded to distinguish which patients were part of the control group, and which were taking the drug.
# Fro the purposes of the project, several lines were commented out so that a PDF was not created.
set.seed(1234)
tumorgrowth <- sampleData
tumorgrowth <- do.call(rbind,
by(tumorgrowth, tumorgrowth$name,
function(subset) within(subset,
{ treatment <- ifelse(rbinom(1,1,0.5), "Drug","Control") ## subjects are randomly placed in control or drug treatment arms
o <- order(date)
date <- date[o]
size <- size[o]
baseline <- size[1]
percentChange <- 100*(size-baseline)/baseline
time <- ifelse(date > 240, 240, date) ## data censored at 240 days
cstatus <- factor(ifelse(date > 240, 0, 1))
})))
rownames(tumorgrowth) <- NULL
## Save plot in file
# png(filename = "C:\\Path\\To\\SpiderPlot\\SpiderPlot.png", width = 640, height = 640)
## Plot settings
p <- ggplot(tumorgrowth, aes(x=time, y=percentChange, group=name)) +
theme_bw(base_size=14) +
theme(axis.title.x = element_text(face="bold"), axis.text.x = element_text(face="bold")) +
theme(axis.title.y = element_text(face="bold"), axis.text.y = element_text(face="bold")) +
theme(plot.title = element_text(size=18, hjust=0.5)) +
labs(list(title = "Spider Plot", x = "Time (in days)", y = "Change from baseline (%)"))
## Now plot
p <- p + geom_line(aes(color=treatment)) +
geom_point(aes(shape=cstatus, color=treatment), show.legend=FALSE) +
scale_colour_discrete(name="Treatment", labels=c("Control", "Drug")) +
scale_shape_manual(name = "cstatus", values = c("0"=3, "1"=16)) +
coord_cartesian(xlim=c(0, 240))
# print(p)
# dev.off()
p

# Follow-up:
# At first, we found the data to be a bit cluttered, and therefore separated the two types of treatment, by "Control" and "Drug". In this graph, we were able to see the side by side data, still in a spider plot format. It displayed the individual patients and how their tumors progressed, whether with the drug or as a control. To look at the data differently, we made a histogram. This graph displays the tumor growth per check-up, for both treatment types. The growth amount was compared to the baseline measurement.
# By showing both the spider plot and a histogram of tumor progress, the viewer can examine both the progression of individual patients based on their treatment, and the general progression of all the tumors over the course of their check-ins. This allows us to examine overall pattern differences between the control and the treatment. This will allow for an additional comparison for more in depth exploration of the differences between the treatment and control.
set.seed(1234)
tumorgrowth <- sampleData
tumorgrowth <- do.call(rbind,
by(tumorgrowth, tumorgrowth$name,
function(subset) within(subset,
{ treatment <- ifelse(rbinom(1,1,0.5), "Drug","Control") ## subjects are randomly placed in control or drug treatment arms
o <- order(date)
date <- date[o]
size <- size[o]
baseline <- size[1]
percentChange <- 100*(size-baseline)/baseline
time <- ifelse(date > 240, 240, date) ## data censored at 240 days
cstatus <- factor(ifelse(date > 240, 0, 1))
})))
rownames(tumorgrowth) <- NULL
## Plot settings
Np <- ggplot(tumorgrowth, aes(x=time, y=percentChange, group=name)) +
theme_bw(base_size=14) +
theme(axis.title.x = element_text(face="bold"), axis.text.x = element_text(face="bold")) +
theme(axis.title.y = element_text(face="bold"), axis.text.y = element_text(face="bold")) +
theme(plot.title = element_text(size=18, hjust=0.5)) +
labs(list(x = "Time (in days)", y = "Baseline change (%)"))#+geom_smooth()
## Now plot
Np <- Np + geom_line(aes(color=treatment)) +
geom_point(aes(color=treatment), show.legend=FALSE) +
scale_colour_discrete(name="Treatment", labels=c("Control", "Drug")) +
scale_shape_manual(name = "cstatus", values = c("0"=3, "1"=16)) +
coord_cartesian(xlim=c(0, 240))+
facet_grid(~treatment)
Np

hi<-ggplot(tumorgrowth, aes(percentChange, fill=treatment))+geom_histogram(binwidth = 19)+facet_grid(~ treatment)+theme_bw(base_size=14) +
theme(axis.title.x = element_text(face="bold"), axis.text.x = element_text(face="bold")) +
theme(axis.title.y = element_text(face="bold"), axis.text.y = element_text(face="bold")) +
theme(plot.title = element_text(size=18, hjust=0.5)) +
labs(x="Percent Change", y="Count", fill = "Treatment")
ggarrange(Np,hi, ncol=1,nrow = 2)

# Discussion:
# Instead of viewing each patient overtime, we are able to observe the tumor's size visit by visit. In this format, it is clear that there was much less tumor growth in a patient when they were using the drug. Although a spider plot is a good way to show tumor measurement over time (in relation to the baseline), it may not be the best way to show that the drug is more effective.
# This activity required us to think about data in a different way than the original display. Although the data was the same, we made a graph that looked completely different than the original. In order to find a new way to display this data, we had to imagine it from a different view. By looking at percentage change visit by visit, we were able to see how much the drug was working. By showing each percentage change per visit, separated by control and drug patients, it is much easier to see that drug is working.
# If we were to complete this lab again, we would stick with the spider plot format. The purpose of the article was to show that a spider plot is a great way to display data similar to tumor growth. In order to change a variable, we could display the original size of the tumors, instead of a "baseline". If the drug was not as effective on tumors that were large when discovered, this change to the graph would display that. Not only would this type of graph show how effective the drug works, it would show how much of an impact it has on tumors of various sizes.