Hellen Butungi, Judith Alawa, Wangwe Ibrahim, Francis Mwatuni, Dawit Kidanemariam, Triza Tonui, Fatma Hussein, Donwilliams Otieno, Job Areba
5th December 2015
Human RNAseq data assembled with two assemblers: Oases and Soap. We want to compare the quality of the two assemblies.
# 1. load the files
oases <- read.csv('./oases.csv')
soap <- read.csv('./soapdenovotrans.csv')
# add an 'assembler' column
oases$assembler <- 'oases'
soap$assembler <- 'soap'
# create a merged data frame
assemblies <- rbind(oases, soap)
total_hits <- length(levels(assemblies[!is.na(assemblies$hits), 'hits']))
print(total_hits)
[1] 15949
hits_each <- data.frame(
oases = length(levels(oases[!is.na(oases$hits), 'hits'])),
soap = length(levels(soap[!is.na(soap$hits), 'hits']))
)
print(hits_each)
oases soap
1 14141 11841
oases_hits <- levels(oases[!is.na(oases$hits), 'hits'])
soap_hits <- levels(soap[!is.na(soap$hits), 'hits'])
unique_hits <- data.frame(
oases = length(setdiff(oases_hits, soap_hits)),
soap = length(setdiff(soap_hits, oases_hits))
)
print(unique_hits)
oases soap
1 4108 1808
oases_coverage <- oases[!is.na(oases$reference_coverage), 'reference_coverage']
soap_coverage <- soap[!is.na(soap$reference_coverage), 'reference_coverage']
average_coverage <- data.frame(
calculation = c('mean'),
oases = c(mean(oases_coverage)),
soap = c(mean(soap_coverage))
)
print(average_coverage)
calculation oases soap
1 mean 0.6581914 0.6473458
Mean coverage multipled by proportion of reference transcripts found