Here is an attempt to draw a lollipop plot of NOTCH1-10 SNP data:
# Libraries
library(ggplot2)
library(dplyr) ## for piping
library(tidyr) ## for separate()
library(gridExtra) ## for grid.arrange()
library(grid) ## for grid.draw()
# Obtain data
d <- read.csv("/Users/mi36288/RPCI/Divya/NOTCH1-10.csv", header=TRUE)
df <- data.frame(d)
head(df)
## ID segment chr start end type class VAF USMs S4
## 1 80 NOTCH1-10 9 139412301 139412400 NE intron 0.02128081 USM 7
## 2 81 NOTCH1-10 9 139412301 139412400 NE silent 0.01226590 NUSM 9
## 3 82 NOTCH1-10 9 139412301 139412400 NE silent 0.02441092 NUSM 11
## 4 83 NOTCH1-10 9 139412301 139412400 SE intron 0.01563667 USM 20
## 5 84 NOTCH1-10 9 139412301 139412400 NE missense 0.04596536 NUSM 27
## 6 85 NOTCH1-10 9 139412301 139412400 NE silent 0.02369065 USM 28
## mutations
## 1 9.139412393.G.A
## 2 9.139412322.C.G
## 3 9.139412376.G.A
## 4 9.139412392.G.A
## 5 9.139412381.G.T
## 6 9.139412382.G.A
dim(df)
## [1] 96 11
# Assemble data objects
SNPs0 <- df %>%
select(mutations, type, class, USMs) %>%
separate(col=mutations,into=c("chrom", "location", "Before", "After"), sep="\\.")
head(SNPs0)
## chrom location Before After type class USMs
## 1 9 139412393 G A NE intron USM
## 2 9 139412322 C G NE silent NUSM
## 3 9 139412376 G A NE silent NUSM
## 4 9 139412392 G A SE intron USM
## 5 9 139412381 G T NE missense NUSM
## 6 9 139412382 G A NE silent USM
dim(SNPs0)
## [1] 96 7
SNPs0 <- SNPs0 %>% ## overwrite
select(chrom,location,type,class,USMs) %>%
mutate(chrom=as.factor(chrom),
location=as.integer(location),
type=as.factor(type),
class=as.factor(class),
USMs=as.factor(USMs))
dim(SNPs0)
## [1] 96 5
names(SNPs0)
## [1] "chrom" "location" "type" "class" "USMs"
## Add frequency columns
SNPs <- SNPs0 %>%
group_by(chrom,location,type) %>% ## class+USMs
arrange(location,USMs,class) %>% ## sorting
mutate(Freq=1:n()) %>% ## indexing the events in each group
ungroup()
dim(SNPs)
## [1] 96 6
levels(SNPs$USMs)
## [1] "NUSM" "USM"
SNPs$USMs <- factor(SNPs$USMs, levels=c("USM","NUSM")) # Flipping order
levels(SNPs$USMs)
## [1] "USM" "NUSM"
## Another data set with no repeats
SNPs2 <- SNPs0 %>%
group_by(chrom,location,type) %>% ## class+USMs
mutate(Freq=n()) %>% ## number of events in each group
distinct() %>% ## keep only the first row for each group
ungroup() %>% ## To go back to the original data frame
arrange(location) ## sorting
dim(SNPs2)
## [1] 47 6
head(SNPs2)
## # A tibble: 6 × 6
## chrom location type class USMs Freq
## <fct> <int> <fct> <fct> <fct> <int>
## 1 9 139412302 SE missense USM 1
## 2 9 139412302 NE missense USM 1
## 3 9 139412303 SE nonsense USM 1
## 4 9 139412309 SE missense USM 1
## 5 9 139412310 NE silent NUSM 2
## 6 9 139412315 SE missense NUSM 1
## Color palette for class variables
mypalette <- c("#999999", "#E69F00", "#D55E00", "#009E73",
"#CC79A7", "#56B4E9", "#0072B2", "#000000" )
myshape <- c(21,22)
## Lollipop plot using gridExtra
x0=139412370 ## left end
x1=139412400 ## right end
y0=-1 ## baseline
pointsize=3
p1 <- ggplot(subset(SNPs, type=="SE", drop=FALSE)) + ## top graph
scale_shape_manual(values = myshape, drop=FALSE) + ## for shapes
scale_fill_manual(values = mypalette, drop=FALSE) + ## for fills
scale_color_manual(values = mypalette, drop=FALSE) + ## for colors
geom_segment(data = subset(SNPs2, type=="SE", drop=FALSE),
aes(x=location, xend=location, y=y0, yend=Freq),
color="grey") + ## sticks
geom_point(aes(x=location, y=Freq,
fill=class, shape=USMs), size=pointsize, color="black") +
geom_rug(aes(x=location, y=Freq, color=class),
show.legend=FALSE, sides="b") + ## track
geom_text(data = subset(SNPs2, type=="SE", drop=FALSE),
aes(x=location, y=Freq+0.5, label=location, angle=90),
hjust=0, vjust=0.5) + ## annotation
facet_grid(rows = vars(type)) +
guides(size="none", shape="none", color="none", ## remove some legends
fill=guide_legend(override.aes=list(shape=21, size=3, stroke=NA))) +
xlim(x0, x1) +
ylim(y0,18) +
theme_light() + ## for white background
theme(
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_text(angle=0, vjust=0.5, hjust=0.5),
plot.margin = margin(t = 3, # Top margin
r = 0, # Right margin
b = -5, # Bottom margin
l = 0) # Left margin
) +
xlab("") +
ylab("") +
geom_hline(yintercept=y0)
p2 <- ggplot(subset(SNPs, type=="NE", drop=FALSE)) + ## bottom graph
scale_shape_manual(values = myshape, drop=FALSE) + ## for shapes
scale_fill_manual(values = mypalette, drop=FALSE) + ## for fills
scale_color_manual(values = mypalette, drop=FALSE) + ## for colors
geom_segment(data = subset(SNPs2, type=="NE", drop=FALSE),
aes(x=location, xend=location, y=y0, yend=Freq),
color="grey") + ## sticks
geom_point(aes(x=location, y=Freq,
fill=class, shape=USMs), size=pointsize, color="black") +
geom_rug(aes(x=location, y=Freq, color=class),
show.legend=FALSE, sides="t") + ## track
geom_text(data = subset(SNPs2, type=="NE", drop=FALSE),
aes(x=location, y=Freq+0.5, label=location, angle=270),
hjust=0, vjust=0.5) + ## annotation
facet_grid(rows = vars(type)) +
guides(size="none", fill="none", color="none", ## remove some legends
shape=guide_legend(override.aes=list(size=3))) +
xlim(x0, x1) +
theme_light() + ## for white background
theme(
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
#axis.text.x = element_text(angle=0, vjust=0.5, hjust=0.5),
plot.margin = margin(t = -3, # Top margin
r = 0, # Right margin
b = 2, # Bottom margin
l = 0) # Left margin
) +
xlab("") +
ylab("") +
scale_y_reverse(limits=c(18,y0)) +
geom_hline(yintercept=y0)
grid.arrange(p1,p2, ncol=1, nrow=2)
## For equalized margins using gridExtra
p1 <- ggplotGrob(p1)
p2 <- ggplotGrob(p2)
grid.draw(rbind(p1, p2, size = "first"))