library(ballgown); library(genefilter); library(dplyr); library(devtools); library(gplots); library(ggfortify)
Attaching package: 㤼㸱dplyr㤼㸲
The following objects are masked from 㤼㸱package:ballgown㤼㸲:
contains, expr, last
The following objects are masked from 㤼㸱package:stats㤼㸲:
filter, lag
The following objects are masked from 㤼㸱package:base㤼㸲:
intersect, setdiff, setequal, union
Attaching package: 㤼㸱gplots㤼㸲
The following object is masked from 㤼㸱package:stats㤼㸲:
lowess
Loading required package: ggplot2
Attaching package: 㤼㸱ggplot2㤼㸲
The following object is masked from 㤼㸱package:ballgown㤼㸲:
expr
-rw-rw-r– 1 ubuntu ubuntu 1460476 Mar 5 05:15 Sp_ds.gtf
-rw-rw-r– 1 ubuntu ubuntu 49218 Mar 5 05:15 e2t.ctab
-rw-rw-r– 1 ubuntu ubuntu 364883 Mar 5 05:15 e_data.ctab
-rw-rw-r– 1 ubuntu ubuntu 664 Mar 5 05:15 i2t.ctab
-rw-rw-r– 1 ubuntu ubuntu 2516 Mar 5 05:15 i_data.ctab
-rw-rw-r– 1 ubuntu ubuntu 407989 Mar 5 05:15 t_data.ctab
data_directory="C:/Users/bhagi/Documents/NGS_data_analysis/tuxedo2_S_pombe/ballgown"
data_directory
[1] "C:/Users/bhagi/Documents/NGS_data_analysis/tuxedo2_S_pombe/ballgown"
bg=ballgown(dataDir=data_directory, samplePattern="S", meas="all")
Sun Mar 24 13:34:53 2019
Sun Mar 24 13:34:53 2019: Reading linking tables
Sun Mar 24 13:34:53 2019: Reading intron data files
Sun Mar 24 13:34:53 2019: Merging intron data
Sun Mar 24 13:34:53 2019: Reading exon data files
Sun Mar 24 13:34:54 2019: Merging exon data
Sun Mar 24 13:34:54 2019: Reading transcript data files
Sun Mar 24 13:34:54 2019: Merging transcript data
Wrapping up the results
Sun Mar 24 13:34:54 2019
structure(bg)
$intron
GRanges object with 62 ranges and 2 metadata columns:
seqnames ranges strand | id transcripts
<Rle> <IRanges> <Rle> | <integer> <character>
[1] SPAC144.02_T0 143-190 + | 1 258
[2] SPAC1486.05_T0 2883-2947 + | 2 282
[3] SPAC1639.01c_T0 962-1078 + | 3 360
[4] SPAC17H9.14c_T0 556-623 + | 4 557
[5] SPAC1851.04c_T0 2590-2650 - | 5 595
... ... ... ... . ... ...
[58] SPBC577.11_T0 213-258 + | 58 3789
[59] SPBP35G2.04c_T0 165-248 + | 59 4078
[60] SPCC1235.01_T0 946-985 + | 60 4221
[61] SPCC417.04_T0 151-212 - | 61 4707
[62] SPCC663.01c_T0 2156-2230 + | 62 4890
-------
seqinfo: 36 sequences from an unspecified genome; no seqlengths
$exon
GRanges object with 5114 ranges and 2 metadata columns:
seqnames ranges strand | id transcripts
<Rle> <IRanges> <Rle> | <integer> <character>
[1] SPAC1002.01_T0 3-480 + | 1 1
[2] SPAC1002.02_T0 1-690 + | 2 2
[3] SPAC1002.03c_T0 1-2772 + | 3 3
[4] SPAC1002.04c_T0 1-600 + | 4 4
[5] SPAC1002.05c_T0 11-2134 + | 5 5
... ... ... ... . ... ...
[5110] SPCPB1C11.02_T0 1-1518 + | 5110 5055
[5111] SPCPB1C11.03_T0 1-1695 + | 5111 5056
[5112] SPCPJ732.01_T0 1-558 - | 5112 5057
[5113] SPCPJ732.01_T0 1-1731 + | 5113 5058
[5114] SPCPJ732.02c_T0 3-1666 + | 5114 5059
-------
seqinfo: 4640 sequences from an unspecified genome; no seqlengths
$trans
GRangesList object of length 5059:
$1
GRanges object with 1 range and 2 metadata columns:
seqnames ranges strand | id transcripts
<Rle> <IRanges> <Rle> | <integer> <character>
[1] SPAC1002.01_T0 3-480 + | 1 1
$2
GRanges object with 1 range and 2 metadata columns:
seqnames ranges strand | id transcripts
[1] SPAC1002.02_T0 1-690 + | 2 2
$3
GRanges object with 1 range and 2 metadata columns:
seqnames ranges strand | id transcripts
[1] SPAC1002.03c_T0 1-2772 + | 3 3
...
<5056 more elements>
-------
seqinfo: 4640 sequences from an unspecified genome; no seqlengths
pData(bg)=data.frame(id=sampleNames(bg), group=factor(c(1,1,2,2)))
names(structure(bg))
[1] "intron" "exon" "trans"
structure(bg)$exon
GRanges object with 5114 ranges and 2 metadata columns:
seqnames ranges strand | id transcripts
<Rle> <IRanges> <Rle> | <integer> <character>
[1] SPAC1002.01_T0 3-480 + | 1 1
[2] SPAC1002.02_T0 1-690 + | 2 2
[3] SPAC1002.03c_T0 1-2772 + | 3 3
[4] SPAC1002.04c_T0 1-600 + | 4 4
[5] SPAC1002.05c_T0 11-2134 + | 5 5
... ... ... ... . ... ...
[5110] SPCPB1C11.02_T0 1-1518 + | 5110 5055
[5111] SPCPB1C11.03_T0 1-1695 + | 5111 5056
[5112] SPCPJ732.01_T0 1-558 - | 5112 5057
[5113] SPCPJ732.01_T0 1-1731 + | 5113 5058
[5114] SPCPJ732.02c_T0 3-1666 + | 5114 5059
-------
seqinfo: 4640 sequences from an unspecified genome; no seqlengths
structure(bg)$intron
GRanges object with 62 ranges and 2 metadata columns:
seqnames ranges strand | id transcripts
<Rle> <IRanges> <Rle> | <integer> <character>
[1] SPAC144.02_T0 143-190 + | 1 258
[2] SPAC1486.05_T0 2883-2947 + | 2 282
[3] SPAC1639.01c_T0 962-1078 + | 3 360
[4] SPAC17H9.14c_T0 556-623 + | 4 557
[5] SPAC1851.04c_T0 2590-2650 - | 5 595
... ... ... ... . ... ...
[58] SPBC577.11_T0 213-258 + | 58 3789
[59] SPBP35G2.04c_T0 165-248 + | 59 4078
[60] SPCC1235.01_T0 946-985 + | 60 4221
[61] SPCC417.04_T0 151-212 - | 61 4707
[62] SPCC663.01c_T0 2156-2230 + | 62 4890
-------
seqinfo: 36 sequences from an unspecified genome; no seqlengths
structure(bg)$trans
GRangesList object of length 5059:
$1
GRanges object with 1 range and 2 metadata columns:
seqnames ranges strand | id transcripts
<Rle> <IRanges> <Rle> | <integer> <character>
[1] SPAC1002.01_T0 3-480 + | 1 1
$2
GRanges object with 1 range and 2 metadata columns:
seqnames ranges strand | id transcripts
[1] SPAC1002.02_T0 1-690 + | 2 2
$3
GRanges object with 1 range and 2 metadata columns:
seqnames ranges strand | id transcripts
[1] SPAC1002.03c_T0 1-2772 + | 3 3
...
<5056 more elements>
-------
seqinfo: 4640 sequences from an unspecified genome; no seqlengths
transcript_fpkm = texpr(bg, "FPKM")
head(transcript_fpkm)
FPKM.Sp_ds FPKM.Sp_hs FPKM.Sp_log FPKM.Sp_plat
1 26.83887 10.66771 27.71478 31.47178
2 167.16380 59.38688 78.52016 110.63217
3 141.24464 74.06425 90.76392 118.90112
4 43.37009 40.40438 28.87185 47.15420
5 29.76646 18.64130 13.50993 56.32640
6 167.24104 388.51425 180.40793 66.16932
transcript_cov = texpr(bg, "cov")
head(transcript_cov)
cov.Sp_ds cov.Sp_hs cov.Sp_log cov.Sp_plat
1 2.506276 0.987448 2.822176 2.700837
2 15.610145 5.497101 7.995652 9.494203
3 13.189754 6.855700 9.242424 10.203824
4 4.050000 3.740000 2.940000 4.046667
5 2.779661 1.725518 1.375706 4.833804
6 15.617357 35.962524 18.370810 5.678501
whole_tx_table = texpr(bg, "all")
head(whole_tx_table)
exon_mcov = eexpr(bg, "mcov")
head(exon_mcov)
mcov.Sp_ds mcov.Sp_hs mcov.Sp_log mcov.Sp_plat
1 2.5000 0.9874 2.8201 2.7008
2 15.6029 5.6942 7.9899 9.4928
3 13.1898 6.8557 9.2424 10.2035
4 4.0483 3.7400 2.9400 4.0450
5 2.7792 1.7255 1.3757 4.8338
6 15.6154 35.9428 18.3708 5.6765
junction_recount = iexpr(bg)
head(junction_recount)
rcount.Sp_ds rcount.Sp_hs rcount.Sp_log rcount.Sp_plat
1 7 4 7 5
2 1 0 0 0
3 1 1 8 5
4 6 0 0 0
5 0 0 0 5
6 74 20 17 46
whole_intron_table = iexpr(bg, "all")
head(whole_intron_table)
gene_expression = gexpr(bg)
head(gene_expression)
FPKM.Sp_ds FPKM.Sp_hs FPKM.Sp_log FPKM.Sp_plat
MSTRG.1 26.83887 10.667706 27.71478 31.471781
MSTRG.10 299.36441 286.959656 1263.90503 227.950562
MSTRG.100 38.39506 5.345963 21.96203 45.426273
MSTRG.1000 17.89546 65.530449 51.28185 5.700558
MSTRG.1001 199.42723 132.961121 95.39095 223.718674
MSTRG.1002 82.37018 79.360672 125.50716 67.473282
transcript_fpkm_log2 = log2(transcript_fpkm + 1)
par(mar = c(10, 4, 4, 2))
boxplot(transcript_fpkm_log2, col = as.numeric(pData(bg)$group), las=2, ylab="log2(FPKM + 1)")
cols = colorpanel(99, "blue", "black", "yellow")
heatmap.2(transcript_fpkm_log2, density.info = "none", trace = "none", margins = c(8, 5), cexCol = 1.2, keysize = 1,
col = cols, scale = "none")
Error in plot.new() : figure margins too large
pca = (prcomp(t(transcript_fpkm_log2)))
autoplot(pca, data=pData(bg), colour = "group", label = T, label.label = "id" )
names(indexes(bg))
[1] "e2t" "i2t" "t2g" "bamfiles" "pData"
head(indexes(bg)$e2t)
head(indexes(bg)$i2t)
head(indexes(bg)$t2g)
rr exon_transcript_table = indexes(bg)\(e2t transcript_gene_table = indexes(bg)\)t2g head(transcript_gene_table)
results_transcripts = stattest(bg, feature = "transcript", meas = "FPKM", covariate = "group", getFC = TRUE)
head(results_transcripts)
write.table(results_transcripts, file = "ballgown_transcript_results.txt", sep = "\t", quote = F, row.names = F)
rr results_genes = stattest(bg, feature = , meas = , covariate = , getFC = TRUE) head(results_genes) r write.table(results_genes, file = _gene_results.txt, sep = \t, quote = F, row.names = F)
results_genes <- arrange(results_genes, pval)
head(results_genes)
results_transcripts <- data.frame(geneIDs=ballgown::geneIDs(bg), results_transcripts)
results_transcripts <- arrange(results_transcripts, pval)
head(results_transcripts)
plotTranscripts(gene = "MSTRG.2282", gown = bg, samples = sampleNames(bg), meas = "FPKM", colorby = "transcript",
main = "transcripts from gene MSTRG.2282")
idx <- which(geneIDs(bg)=="MSTRG.2282")
idx
2313
2313
plot(transcript_fpkm_log2[idx, ] ~ pData(bg)$group, border=c(1, 2), main=paste(ballgown::geneIDs(bg)[idx]),
pch=19, xlab="Group", ylab="log2(FPKM+1)")
points(transcript_fpkm_log2[idx,] ~ jitter(as.numeric(pData(bg)$group)), col=as.numeric(pData(bg)$group), pch=20)