Example: NOTCH1-10

Here is an attempt to draw a lollipop plot of NOTCH1-10 SNP data:

# Libraries
library(ggplot2)
library(dplyr)  ## for piping
library(tidyr)  ## for separate()
library(gridExtra)  ## for grid.arrange()
library(grid) ## for grid.draw()
# Obtain data
d <- read.csv("/Users/mi36288/RPCI/Divya/NOTCH1-10.csv", header=TRUE)
df <- data.frame(d)
head(df)
##   ID   segment chr     start       end type    class        VAF USMs S4
## 1 80 NOTCH1-10   9 139412301 139412400   NE   intron 0.02128081  USM  7
## 2 81 NOTCH1-10   9 139412301 139412400   NE   silent 0.01226590 NUSM  9
## 3 82 NOTCH1-10   9 139412301 139412400   NE   silent 0.02441092 NUSM 11
## 4 83 NOTCH1-10   9 139412301 139412400   SE   intron 0.01563667  USM 20
## 5 84 NOTCH1-10   9 139412301 139412400   NE missense 0.04596536 NUSM 27
## 6 85 NOTCH1-10   9 139412301 139412400   NE   silent 0.02369065  USM 28
##         mutations
## 1 9.139412393.G.A
## 2 9.139412322.C.G
## 3 9.139412376.G.A
## 4 9.139412392.G.A
## 5 9.139412381.G.T
## 6 9.139412382.G.A
dim(df)
## [1] 96 11
# Assemble data objects
SNPs0 <- df %>% 
  select(mutations, type, class, USMs) %>%
  separate(col=mutations,into=c("chrom", "location", "Before", "After"), sep="\\.") 
head(SNPs0)
##   chrom  location Before After type    class USMs
## 1     9 139412393      G     A   NE   intron  USM
## 2     9 139412322      C     G   NE   silent NUSM
## 3     9 139412376      G     A   NE   silent NUSM
## 4     9 139412392      G     A   SE   intron  USM
## 5     9 139412381      G     T   NE missense NUSM
## 6     9 139412382      G     A   NE   silent  USM
dim(SNPs0)
## [1] 96  7
SNPs0 <- SNPs0 %>% ## overwrite
  select(chrom,location,type,class,USMs) %>%
  mutate(chrom=as.factor(chrom),
         location=as.integer(location),
         type=as.factor(type),
         class=as.factor(class),
         USMs=as.factor(USMs))
dim(SNPs0)
## [1] 96  5
names(SNPs0)
## [1] "chrom"    "location" "type"     "class"    "USMs"
## Add frequency columns
SNPs <- SNPs0 %>% 
  group_by(chrom,location,type) %>% ## class+USMs
  arrange(location,USMs,class) %>% ## sorting
  mutate(Freq=1:n()) %>% ## indexing the events in each group
  ungroup()  
dim(SNPs)
## [1] 96  6
levels(SNPs$USMs)
## [1] "NUSM" "USM"
SNPs$USMs <- factor(SNPs$USMs, levels=c("USM","NUSM"))  # Flipping order
levels(SNPs$USMs)
## [1] "USM"  "NUSM"
## Another data set with no repeats
SNPs2 <- SNPs0 %>% 
  group_by(chrom,location,type) %>% ## class+USMs
  mutate(Freq=n()) %>% ## number of events in each group
  distinct() %>%  ## keep only the first row for each group
  ungroup() %>% ## To go back to the original data frame
  arrange(location)  ## sorting
dim(SNPs2)
## [1] 47  6
head(SNPs2)
## # A tibble: 6 × 6
##   chrom  location type  class    USMs   Freq
##   <fct>     <int> <fct> <fct>    <fct> <int>
## 1 9     139412302 SE    missense USM       1
## 2 9     139412302 NE    missense USM       1
## 3 9     139412303 SE    nonsense USM       1
## 4 9     139412309 SE    missense USM       1
## 5 9     139412310 NE    silent   NUSM      2
## 6 9     139412315 SE    missense NUSM      1
## Color palette for class variables
mypalette <- c("#999999", "#E69F00", "#D55E00", "#009E73",
               "#CC79A7", "#56B4E9", "#0072B2", "#000000" )
myshape <- c(21,22)

## Lollipop plot using gridExtra
x0=139412370  ## left end
x1=139412400  ## right end
y0=-1  ## baseline
pointsize=3

p1 <- ggplot(subset(SNPs, type=="SE", drop=FALSE)) +  ## top graph
  scale_shape_manual(values = myshape, drop=FALSE) +  ## for shapes
  scale_fill_manual(values = mypalette, drop=FALSE) +  ## for fills
  scale_color_manual(values = mypalette, drop=FALSE) +  ## for colors
  geom_segment(data = subset(SNPs2, type=="SE", drop=FALSE),
               aes(x=location, xend=location, y=y0, yend=Freq), 
               color="grey") +  ## sticks
  geom_point(aes(x=location, y=Freq,
                 fill=class, shape=USMs), size=pointsize, color="black") +
  geom_rug(aes(x=location, y=Freq, color=class), 
           show.legend=FALSE, sides="b") +   ## track
  geom_text(data = subset(SNPs2, type=="SE", drop=FALSE), 
            aes(x=location, y=Freq+0.5, label=location, angle=90),
            hjust=0, vjust=0.5) +  ## annotation
  facet_grid(rows = vars(type)) +
  guides(size="none", shape="none", color="none", ## remove some legends
         fill=guide_legend(override.aes=list(shape=21, size=3, stroke=NA))) + 
  xlim(x0, x1) +
  ylim(y0,18) +
  theme_light() +  ## for white background
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=0, vjust=0.5, hjust=0.5),
    plot.margin = margin(t = 3,  # Top margin
                         r = 0,  # Right margin
                         b = -5,  # Bottom margin
                         l = 0)  # Left margin
  ) +
  xlab("") +
  ylab("") +
  geom_hline(yintercept=y0) 
  
p2 <- ggplot(subset(SNPs, type=="NE", drop=FALSE)) +  ## bottom graph
  scale_shape_manual(values = myshape, drop=FALSE) +  ## for shapes
  scale_fill_manual(values = mypalette, drop=FALSE) +  ## for fills
  scale_color_manual(values = mypalette, drop=FALSE) +  ## for colors
  geom_segment(data = subset(SNPs2, type=="NE", drop=FALSE),
               aes(x=location, xend=location, y=y0, yend=Freq), 
               color="grey") +  ## sticks
  geom_point(aes(x=location, y=Freq,
                 fill=class, shape=USMs), size=pointsize, color="black") +
  geom_rug(aes(x=location, y=Freq, color=class), 
           show.legend=FALSE, sides="t") +   ## track
  geom_text(data = subset(SNPs2, type=="NE", drop=FALSE),
            aes(x=location, y=Freq+0.5, label=location, angle=270),
            hjust=0, vjust=0.5) + ## annotation
  facet_grid(rows = vars(type)) +
  guides(size="none", fill="none", color="none",  ## remove some legends
         shape=guide_legend(override.aes=list(size=3))) +  
  xlim(x0, x1) +
  theme_light() +  ## for white background
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_blank(),
    #axis.text.x = element_text(angle=0, vjust=0.5, hjust=0.5),
    plot.margin = margin(t = -3,  # Top margin
                         r = 0,  # Right margin
                         b = 2,  # Bottom margin
                         l = 0)  # Left margin
  ) +
  xlab("") +
  ylab("") +
  scale_y_reverse(limits=c(18,y0)) +
  geom_hline(yintercept=y0) 

grid.arrange(p1,p2, ncol=1, nrow=2)

## For equalized margins using gridExtra
p1 <- ggplotGrob(p1)
p2 <- ggplotGrob(p2)
grid.draw(rbind(p1, p2, size = "first"))