Finding the right the parameters for assembling transcripts is important. Ultimately, you want to make sure you are capturing biological complexity while filtering potential program errors. Here, I am analyzing the output from HISAT2 mapped reads that have been assembled by StringTie.
Load in necessary packages
library(dplyr)
library(ggvenn)
library(tidyverse)
library(ggpubr)
library(data.table)
library(UpSetR)
library(ComplexHeatmap)Read in the .tmap file. The .tmap file contains genomic inforamtion about the transcripts that have been identified. Also, read in a file that contains a list of all antisense transcripts that have been identified in each set of parameters
#default
default_tmap <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/default_assemble/gffcompare/gffcompare.default_assemble.gtf.tmap",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
default_as <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/default_assemble/gffcompare/CPC2_output/default_antisense_tx_names.txt",
delim = "\t", escape_double = FALSE,
col_names = FALSE, trim_ws = TRUE)
#test1
test1_tmap <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test1_assemble/gffcompare/gffcompare.test1_assemble.gtf.tmap",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
test1_as <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test1_assemble/gffcompare/CPC2_output/test1_antisense_tx_names.txt",
delim = "\t", escape_double = FALSE,
col_names = FALSE, trim_ws = TRUE)
#test2
test2_tmap <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test2_assemble/gffcompare/gffcompare.test2_assemble.gtf.tmap",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
test2_as <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test2_assemble/gffcompare/CPC2_output/test2_antisense_tx_names.txt",
delim = "\t", escape_double = FALSE,
col_names = FALSE, trim_ws = TRUE)
#test3
test3_tmap <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test3_assemble/gffcompare/gffcompare.test3_assemble.gtf.tmap",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
test3_as <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test3_assemble/gffcompare/CPC2_output/test3_antisense_tx_names.txt",
delim = "\t", escape_double = FALSE,
col_names = FALSE, trim_ws = TRUE)
#test4
test4_tmap <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test4_assemble/gffcompare/gffcompare.at_tissue_atlas_test4.gtf.tmap",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
test4_as <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test4_assemble/gffcompare/CPC2_output/test4_antisense_tx_names.txt",
delim = "\t", escape_double = FALSE,
col_names = FALSE, trim_ws = TRUE)Lets clean up these tmap files and extract generate a file that contains the antisense/pcg pair from each annotation
#default
default_tmap_filt <- dplyr::filter(default_tmap, class_code == 'x') #extract all antisense transcripts
default_tmap_filt<- default_tmap_filt[, c(5,4,2,1,6,10,11,12)] #filter for only the columns of interest and reorder for downstream analyses
default_tmap_filt %>%
right_join(default_as, by = c("qry_id" = "X1")) -> default_tmap_filt #grab genomic information for antisense transcripts
default_tmap_filt$ref_id <- gsub("transcript:","", as.character(default_tmap_filt$ref_id)) #clean up column information
default_tmap_filt$ref_gene_id <- gsub("gene:","", as.character(default_tmap_filt$ref_gene_id)) #clean up column information
default_gene_pairs <- default_tmap_filt[, c(1,2,3,4)] #generate a file that contains gene and transcript information for the pcg and antisense
default_gene_pairs$ref_id <-gsub("transcript:","", as.character(default_gene_pairs$ref_id)) #clean up column information
default_gene_pairs$ref_gene_id <-gsub("gene:","", as.character(default_gene_pairs$ref_gene_id)) #clean up column information
default_gene_pairs <- default_gene_pairs[, c(2,4)] %>% distinct() #filter for redundancy in the gene pair file
#test1
test1_tmap_filt <- dplyr::filter(test1_tmap, class_code == 'x')
test1_tmap_filt<- test1_tmap_filt[, c(5,4,2,1,6,10,11,12)]
test1_tmap_filt %>%
right_join(test1_as, by = c("qry_id" = "X1")) -> test1_tmap_filt
test1_tmap_filt$ref_id <- gsub("transcript:","", as.character(test1_tmap_filt$ref_id))
test1_tmap_filt$ref_gene_id <- gsub("gene:","", as.character(test1_tmap_filt$ref_gene_id))
test1_gene_pairs <- test1_tmap_filt[, c(1,2,3,4)]
test1_gene_pairs$ref_id <-gsub("transcript:","", as.character(test1_gene_pairs$ref_id))
test1_gene_pairs$ref_gene_id <-gsub("gene:","", as.character(test1_gene_pairs$ref_gene_id))
test1_gene_pairs <- test1_gene_pairs[, c(2,4)] %>% distinct()
#test2
test2_tmap_filt <- dplyr::filter(test2_tmap, class_code == 'x')
test2_tmap_filt<- test2_tmap_filt[, c(5,4,2,1,6,10,11,12)]
test2_tmap_filt %>%
right_join(test2_as, by = c("qry_id" = "X1")) -> test2_tmap_filt
test2_tmap_filt$ref_id <- gsub("transcript:","", as.character(test2_tmap_filt$ref_id))
test2_tmap_filt$ref_gene_id <- gsub("gene:","", as.character(test2_tmap_filt$ref_gene_id))
test2_gene_pairs <- test2_tmap_filt[, c(1,2,3,4)]
test2_gene_pairs$ref_id <-gsub("transcript:","", as.character(test2_gene_pairs$ref_id))
test2_gene_pairs$ref_gene_id <-gsub("gene:","", as.character(test2_gene_pairs$ref_gene_id))
test2_gene_pairs <- test2_gene_pairs[, c(2,4)] %>% distinct()
#test3
test3_tmap_filt <- dplyr::filter(test3_tmap, class_code == 'x')
test3_tmap_filt<- test3_tmap_filt[, c(5,4,2,1,6,10,11,12)]
test3_tmap_filt %>%
right_join(test3_as, by = c("qry_id" = "X1")) -> test3_tmap_filt
test3_tmap_filt$ref_id <- gsub("transcript:","", as.character(test3_tmap_filt$ref_id))
test3_tmap_filt$ref_gene_id <- gsub("gene:","", as.character(test3_tmap_filt$ref_gene_id))
test3_gene_pairs <- test3_tmap_filt[, c(1,2,3,4)]
test3_gene_pairs$ref_id <-gsub("transcript:","", as.character(test3_gene_pairs$ref_id))
test3_gene_pairs$ref_gene_id <-gsub("gene:","", as.character(test3_gene_pairs$ref_gene_id))
test3_gene_pairs <- test3_gene_pairs[, c(2,4)] %>% distinct()
#test4
test4_tmap_filt <- dplyr::filter(test4_tmap, class_code == 'x')
test4_tmap_filt<- test4_tmap_filt[, c(5,4,2,1,6,10,11,12)]
test4_tmap_filt %>%
right_join(test4_as, by = c("qry_id" = "X1")) -> test4_tmap_filt
test4_tmap_filt$ref_id <- gsub("transcript:","", as.character(test4_tmap_filt$ref_id))
test4_tmap_filt$ref_gene_id <- gsub("gene:","", as.character(test4_tmap_filt$ref_gene_id))
test4_gene_pairs <- test4_tmap_filt[, c(1,2,3,4)]
test4_gene_pairs$ref_id <-gsub("transcript:","", as.character(test4_gene_pairs$ref_id))
test4_gene_pairs$ref_gene_id <-gsub("gene:","", as.character(test4_gene_pairs$ref_gene_id))
test4_gene_pairs <- test4_gene_pairs[, c(2,4)] %>% distinct()Grab the genes that are in each intersection
default_pcgs_2 <- default_pcgs_df %>% dplyr::mutate(type="default") %>% dplyr::rename(gene=1)
test1_pcgs_2 <- test1_pcgs_df %>% dplyr::mutate(type="test1") %>% dplyr::rename(gene=1)
test2_pcgs_2 <- test2_pcgs_df %>% dplyr::mutate(type="test2") %>% dplyr::rename(gene=1)
test3_pcgs_2 <- test3_pcgs_df %>% dplyr::mutate(type="test3") %>% dplyr::rename(gene=1)
test4_pcgs_2 <- test4_pcgs_df %>% dplyr::mutate(type="test4") %>% dplyr::rename(gene=1)
all <- rbind(default_pcgs_2,
test1_pcgs_2,
test2_pcgs_2,
test3_pcgs_2,
test4_pcgs_2)
all_2 <- data.table::dcast(setDT(all), gene ~ type, fun.aggregate = length)
all_2 %>% as.data.frame %>% dplyr::filter(., default==1 & test1==1 & test2==1 & test3==1 & test4==1) %>% distinct(gene) -> present_all
all_2 %>% as.data.frame %>% dplyr::filter(., default==1 & test1==0 & test2==0 & test3==0 & test4==0) %>% distinct(gene) -> unique_default
all_2 %>% as.data.frame %>% dplyr::filter(., default==0 & test1==0 & test2==0 & test3==0 & test4==1) %>% distinct(gene) -> unique_test4
all_2 %>% as.data.frame %>% dplyr::filter(.,test3==1 & test4==0) %>% distinct(gene) -> present_t3_not_t4Let’s pull some information from the tmap file about the pcg/antisense pairs that are unique to the default assembly and test4 assembly
default_unique_vs_t4_tmap <- default_tmap_filt
default_unique_vs_t4_tmap %>%
right_join(., unique_default, by=c("ref_gene_id" = "gene")) -> default_unique_vs_t4_tmap
test4_unique_vs_default_tmap <- test4_tmap_filt
test4_unique_vs_default_tmap %>%
right_join(., unique_test4, by=c("ref_gene_id" = "gene")) -> test4_unique_vs_default_tmap
present_t3_not_t4_tmap <- test3_tmap_filt
present_t3_not_t4_tmap %>%
right_join(., present_t3_not_t4, by=c("ref_gene_id" = "gene")) -> present_t3_not_t4_tmapHow many exons do they have?
hist(default_unique_vs_t4_tmap$num_exons)hist(test4_unique_vs_default_tmap$num_exons)default_num_exon <- ggplot(default_tmap_filt, aes(x=num_exons)) +
geom_bar(stat = "count", fill = "#5499C7")+
scale_x_continuous(labels=as.character(default_tmap_filt$num_exons), breaks = default_tmap_filt$num_exons)+
labs(x= "Number of exons of default antisense transcripts", y= "Transcript count")+
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5)+
theme_minimal(base_size = 12)
test1_num_exon <- ggplot(test1_tmap_filt, aes(x=num_exons)) +
geom_bar(stat = "count", fill = "#2980B9")+
scale_x_continuous(labels=as.character(test1_tmap_filt$num_exons), breaks = test1_tmap_filt$num_exons)+
labs(x= "Number of exons of test 1 antisense transcripts", y= "Transcript count")+
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5)+
theme_minimal(base_size = 12)
test2_num_exon <- ggplot(test2_tmap_filt, aes(x=num_exons)) +
geom_bar(stat = "count", fill = "#2471A3")+
scale_x_continuous(labels=as.character(test2_tmap_filt$num_exons), breaks = test2_tmap_filt$num_exons)+
labs(x= "Number of exons of test 2 antisense transcripts", y= "Transcript count")+
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5)+
theme_minimal(base_size = 12)
test3_num_exon <- ggplot(test3_tmap_filt, aes(x=num_exons)) +
geom_bar(stat = "count", fill = "#1F618D")+
scale_x_continuous(labels=as.character(test3_tmap_filt$num_exons), breaks = test3_tmap_filt$num_exons)+
labs(x= "Number of exons of test 3 antisense transcripts", y= "Transcript count")+
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5)+
theme_minimal(base_size = 12)
test4_num_exon <- ggplot(test4_tmap_filt, aes(x=num_exons)) +
geom_bar(stat = "count", fill = "#1A5276")+
scale_x_continuous(labels=as.character(test4_tmap_filt$num_exons), breaks = test4_tmap_filt$num_exons)+
labs(x= "Number of exons of test 4 antisense transcripts", y= "Transcript count")+
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5)+
theme_minimal(base_size = 12)num_exon_def_unq <- ggplot(default_unique_vs_t4_tmap, aes(x=num_exons, y=num_exons)) +
geom_bar(stat = "identity", fill = "#707B7C")+
scale_x_continuous(labels=as.character(default_unique_vs_t4_tmap$num_exons), breaks = default_unique_vs_t4_tmap$num_exons)+
labs(x= "number of exons", y= "transcript count")+
theme_minimal(base_size = 16)
num_exon_t4_unq <- ggplot(test4_unique_vs_default_tmap, aes(x=num_exons, y=num_exons)) +
geom_bar(stat = "identity", fill = "#34495E")+
scale_x_continuous(labels=as.character(test4_unique_vs_default_tmap$num_exons),breaks=c(test4_unique_vs_default_tmap$num_exons))+
labs(x= "number of exons", y= "transcript count")+
geom_text(aes(label = num_exons), vjust = 0)+
theme_minimal(base_size = 16)What is the distribution of antisense/pcg loci orientation? How many of the unique default and test3 transcripts fall into each class of the gene orientations?
Read in ccivr outout and tmap file
default_cc <- read_csv("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/default_assemble/gffcompare/CPC2_output/ccivr/ccivr_output/Table.csv")
test1_cc <- read_csv("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test1_assemble/gffcompare/CPC2_output/ccivr/ccivr_output/Table.csv")
test2_cc <- read_csv("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test2_assemble/gffcompare/CPC2_output/ccivr/ccivr_output/Table.csv")
test3_cc <- read_csv("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test3_assemble/gffcompare/CPC2_output/ccivr/ccivr_output/Table.csv")
test4_cc <- read_csv("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/test4_assemble/gffcompare/CPC2_output/ccivr/ccivr_output/Table.csv")#lets combine the tmap file ccivr output file
default_cc_tmap <- default_tmap %>%
dplyr::select(ref_gene_id,
ref_id,
qry_gene_id,
qry_id,
ref_match_len,
num_exons)
test1_cc_tmap <- test1_tmap %>%
dplyr::select(ref_gene_id,
ref_id,
qry_gene_id,
qry_id,
ref_match_len,
num_exons)
test2_cc_tmap <- test2_tmap %>%
dplyr::select(ref_gene_id,
ref_id,
qry_gene_id,
qry_id,
ref_match_len,
num_exons)
test3_cc_tmap <- test3_tmap %>%
dplyr::select(ref_gene_id,
ref_id,
qry_gene_id,
qry_id,
ref_match_len,
num_exons)
test4_cc_tmap <- test4_tmap %>%
dplyr::select(ref_gene_id,
ref_id,
qry_gene_id,
qry_id,
ref_match_len,
num_exons)#clean up the columns
default_cc_tmap$ref_gene_id <- gsub('gene:', '', default_cc_tmap$ref_gene_id)
default_cc_tmap$ref_id <- gsub('transcript:', '', default_cc_tmap$ref_id)
default_cc_tmap$qry_gene_id <- gsub('gene:', '', default_cc_tmap$qry_gene_id)
default_cc_tmap$qry_id <- gsub('transcript:', '', default_cc_tmap$qry_id)
test1_cc_tmap$ref_gene_id <- gsub('gene:', '', test1_cc_tmap$ref_gene_id)
test1_cc_tmap$ref_id <- gsub('transcript:', '', test1_cc_tmap$ref_id)
test1_cc_tmap$qry_gene_id <- gsub('gene:', '', test1_cc_tmap$qry_gene_id)
test1_cc_tmap$qry_id <- gsub('transcript:', '', test1_cc_tmap$qry_id)
test2_cc_tmap$ref_gene_id <- gsub('gene:', '', test2_cc_tmap$ref_gene_id)
test2_cc_tmap$ref_id <- gsub('transcript:', '', test2_cc_tmap$ref_id)
test2_cc_tmap$qry_gene_id <- gsub('gene:', '', test2_cc_tmap$qry_gene_id)
test2_cc_tmap$qry_id <- gsub('transcript:', '', test2_cc_tmap$qry_id)
test3_cc_tmap$ref_gene_id <- gsub('gene:', '', test3_cc_tmap$ref_gene_id)
test3_cc_tmap$ref_id <- gsub('transcript:', '', test3_cc_tmap$ref_id)
test3_cc_tmap$qry_gene_id <- gsub('gene:', '', test3_cc_tmap$qry_gene_id)
test3_cc_tmap$qry_id <- gsub('transcript:', '', test3_cc_tmap$qry_id)
test4_cc_tmap$ref_gene_id <- gsub('gene:', '', test4_cc_tmap$ref_gene_id)
test4_cc_tmap$ref_id <- gsub('transcript:', '', test4_cc_tmap$ref_id)
test4_cc_tmap$qry_gene_id <- gsub('gene:', '', test4_cc_tmap$qry_gene_id)
test4_cc_tmap$qry_id <- gsub('transcript:', '', test4_cc_tmap$qry_id)Join the ccivr output and clean tmap file based up sense and antisense
default_j <- left_join(default_cc_tmap,
default_cc,
by = c("ref_id" = "id",
"qry_id" = "_id")) %>%
na.omit() %>%
distinct()
test1_j <- left_join(test1_cc_tmap,
test1_cc,
by = c("ref_id" = "id",
"qry_id" = "_id")) %>%
na.omit() %>%
distinct()
test2_j <- left_join(test2_cc_tmap,
test2_cc,
by = c("ref_id" = "id",
"qry_id" = "_id")) %>%
na.omit() %>%
distinct()
test3_j <- left_join(test3_cc_tmap,
test3_cc,
by = c("ref_id" = "id",
"qry_id" = "_id")) %>%
na.omit() %>%
distinct()
test4_j <- left_join(test4_cc_tmap,
test4_cc,
by = c("ref_id" = "id",
"qry_id" = "_id")) %>%
na.omit() %>%
distinct()Generate a ccivr output table of the identified NATs
default_nat <- left_join(default_as, default_j,
by = c("X1" = "qry_id")) %>%
na.omit() %>%
distinct()
default_nat %>%
distinct(ref_gene_id) -> default_unique_pcg_cc_output
test1_nat <- left_join(test1_as, test1_j,
by = c("X1" = "qry_id")) %>%
distinct()%>%
na.omit()
test1_nat %>%
distinct(ref_gene_id) -> test1_unique_pcg_cc_output
test2_nat <- left_join(test2_as, test2_j,
by = c("X1" = "qry_id")) %>%
distinct()%>%
na.omit()
test2_nat %>%
distinct(ref_gene_id) -> test2_unique_pcg_cc_output
test3_nat <- left_join(test3_as, test3_j,
by = c("X1" = "qry_id")) %>%
distinct()%>%
na.omit()
test3_nat %>%
distinct(ref_gene_id) -> test3_unique_pcg_cc_output
test4_nat <- left_join(test4_as, test4_j,
by = c("X1" = "qry_id")) %>%
distinct()%>%
na.omit()
test4_nat %>%
distinct(ref_gene_id) -> test4_unique_pcg_cc_output
#determine the frequency of each class code
default_type_sum <- default_nat %>%
group_by(Type) %>%
summarise(n = n()) %>%
mutate(freq = (n / sum(n)) * 100,
trial = "default")
test1_type_sum <- test1_nat %>%
group_by(Type) %>%
summarise(n = n()) %>%
mutate(freq = (n / sum(n)) * 100,
trial = "test1")
test2_type_sum <- test2_nat %>%
group_by(Type) %>%
summarise(n = n()) %>%
mutate(freq = (n / sum(n)) * 100,
trial = "test2")
test3_type_sum <- test3_nat %>%
group_by(Type) %>%
summarise(n = n()) %>%
mutate(freq = (n / sum(n)) * 100,
trial = "test3")
test4_type_sum <- test4_nat %>%
group_by(Type) %>%
summarise(n = n()) %>%
mutate(freq = (n / sum(n)) * 100,
trial = "test4")all_test_com <- do.call("rbind", list(default_type_sum,
test1_type_sum,
test2_type_sum,
test3_type_sum,
test4_type_sum)) #lets combine all of the summary tables for each species into one data frame
level_order <- c('default', 'test1', 'test2', 'test3', 'test4') #specify the order in which you want the species to be ordered in the plot
all_test_com %>%
ggplot(., aes(fill = Type, x = factor(trial, level = level_order), y = freq)) +
geom_bar(stat = "identity", alpha = .8) +
ggsci::scale_fill_aaas() +
ylab('Proportion of cis-NATs') +
xlab('Assemebly Parameters') +
geom_text(aes(label=paste0(sprintf("%1.1f", freq),"%")),
position=position_stack(vjust=0.5),
size = 6) +
theme_classic(base_size = 22) -> all_test_com_graphShow the proportion of genes that have isoforms
default_as_tx <- default_tmap_filt[, c(1,2)]
write.csv(default_as_tx, "/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/interim_files/default_as_tx.csv")
default_as_gen <- default_tmap_filt[, c(1,2)] %>% distinct(qry_gene_id)
test1_as_tx <- test1_tmap_filt[, c(1,2)]
write.csv(test1_as_tx, "//mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/interim_files/test1_as_tx.csv")
test1_as_gen <- test1_tmap_filt[, c(1,2)] %>% distinct(qry_gene_id)
test2_as_tx <- test2_tmap_filt[, c(1,2)]
write.csv(test2_as_tx, "/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/interim_files/test2_as_tx.csv")
test2_as_gen <- test2_tmap_filt[, c(1,2)] %>% distinct(qry_gene_id)
test3_as_tx <- test3_tmap_filt[, c(1,2)]
write.csv(test3_as_tx, "/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/interim_files/test3_as_tx.csv")
test3_as_gen <- test3_tmap_filt[, c(1,2)] %>% distinct(qry_gene_id)
test4_as_tx <- test4_tmap_filt[, c(1,2)]
write.csv(test4_as_tx, "/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/AT/at_tissue_atlas_trial/interim_files/test4_as_tx.csv")
test4_as_gen <- test4_tmap_filt[, c(1,2)] %>% distinct(qry_gene_id)isoform_gene <- read_delim("/mnt/Kanta/antisense_lncRNAs/hisat2_mapping/sup_files/isoform_gene.txt",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)## Rows: 10 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): Type, trial
## dbl (2): n, freq
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
level_order <- c('default', 'test1', 'test2', 'test3', 'test4')
isoform_gene %>%
ggplot(., aes(fill = Type, x = factor(trial, level = level_order), y = freq)) +
geom_bar(stat = "identity", alpha = .7) +
ggsci::scale_fill_aaas() +
ylab('Proportion of cis-NATs') +
xlab('Assemebly Parameters') +
geom_text(aes(label=paste0(sprintf("%1.1f", freq),"%")),
position=position_stack(vjust=0.5),
size = 6) +
theme_classic(base_size = 18) -> iso_freq_allTrue Default and Test4 comparisons
default_unique_ori <- default_unique_vs_t4_tmap[, c(4,3,2,1,5,6,7,8)]
default_unique_ori_com <- left_join(default_unique_ori, default_j,
by = c("ref_gene_id" = "ref_gene_id")) %>% distinct(ref_gene_id, qry_gene_id.x, Type)## Warning in left_join(default_unique_ori, default_j, by = c(ref_gene_id = "ref_gene_id")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 17 of `x` matches multiple rows in `y`.
## ℹ Row 149 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.