#library calling
library("dplyr")
library("DESeq2")
library("ggplot2")
#Importing the count data
setwd("C:/Users/user/Desktop/MY Thesis journey(PBT)/RNA seq")
count_data<-read.csv('count_matrix_all_TF.csv',row.names = 1,header=TRUE)
count_data<-count_data[which(rowSums(count_data)>50),]
head(count_data)
#replace with non_zero value for creating deseq object which doesn't except non-Zero value
count_data<-count_data+1
head(count_data)
#importing metadata and filtering for leaf tissue
metadata<-read.csv('metadata_processed.csv',row.names =1,header=TRUE)
#%>%filter(tissue=="leaf")
head(metadata)
#Make DESeq2 object from counts and metadata
ddsMat <- DESeqDataSetFromMatrix(countData = count_data,
colData = metadata,
design = ~treatment)
converting counts to integer mode
ddsMat
class: DESeqDataSet
dim: 1582 478
metadata(1): version
assays(1): counts
rownames(1582): Os01g0102400 Os01g0108400 ... Os12g0636200.2 Os12g0636200.3
rowData names(0):
colnames(478): SRR7723492 SRR7723493 ... SRR7723969 SRR7723970
colData names(6): Run Isolation_source ... RNAseqTime_Point PHENOTYPE
# Convert all samples to vsd(data normalization)
vsd <- varianceStabilizingTransformation(ddsMat, blind = FALSE)
-- note: fitType='parametric', but the dispersion trend was not well captured by the
function: y = a/x + b, and a local regression fit was automatically substituted.
specify fitType='local' or 'mean' to avoid this message next time.
#PCA PLOT
# Plot PCA by column variable
plotPCA(vsd, intgroup = "treatment", ntop = 5) +
theme(aspect.ratio=1)

# remove default ggplot2 theme
geom_point(size = 1) +
# Increase point size
scale_y_continuous(limits = c(-15, 100)) +
# change limits to fix figure dimensions
ggtitle(label = "Principal Component Analysis (PCA)",
subtitle = "Top 500 most variable genes")
Error in `+.gg`:
! Cannot add ggproto objects together. Did you forget to add this object to a ggplot object?
Backtrace:
1. ggplot2:::`+.gg`(...)
# Find differential expressed genes
ddsMat <- DESeq(ddsMat)
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
-- note: fitType='parametric', but the dispersion trend was not well captured by the
function: y = a/x + b, and a local regression fit was automatically substituted.
specify fitType='local' or 'mean' to avoid this message next time.
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 7 genes
-- DESeq argument 'minReplicatesForReplace' = 7
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing
ddsMat
class: DESeqDataSet
dim: 1582 478
metadata(1): version
assays(6): counts mu ... replaceCounts replaceCooks
rownames(1582): Os01g0102400 Os01g0108400 ... Os12g0636200.2 Os12g0636200.3
rowData names(23): baseMean baseVar ... maxCooks replace
colnames(478): SRR7723492 SRR7723493 ... SRR7723969 SRR7723970
colData names(8): Run Isolation_source ... sizeFactor replaceable
# Get results from testing with FDR adjust pvalues
results <- results(ddsMat, pAdjustMethod = "fdr", alpha = 0.05)
results
log2 fold change (MLE): treatment stress vs control
Wald test p-value: treatment stress vs control
DataFrame with 1582 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue padj
<numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
Os01g0102400 1.43934 0.1643464 0.160625 1.02317 0.30622921 NA
Os01g0108400 1.49675 0.0782894 0.159634 0.49043 0.62382956 NA
Os01g0108600 3.83023 -0.3451047 0.112826 -3.05872 0.00222282 0.0105697
Os01g0111500 1.61228 0.2442828 0.151661 1.61072 0.10724060 0.2285402
Os01g0128000 1.53146 0.3674500 0.148828 2.46895 0.01355099 NA
... ... ... ... ... ... ...
Os12g0632600 1.49242 -0.0178436 0.154581 -0.115432 0.908103 NA
Os12g0636200 18.94205 0.0220489 0.129714 0.169980 0.865026 0.930429
Os12g0636200.1 18.94205 0.0220489 0.129714 0.169980 0.865026 0.930429
Os12g0636200.2 18.94205 0.0220489 0.129714 0.169980 0.865026 0.930429
Os12g0636200.3 18.94205 0.0220489 0.129714 0.169980 0.865026 0.930429
# Generate summary of testing.
summary(results)
out of 1582 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up) : 144, 9.1%
LFC < 0 (down) : 275, 17%
outliers [1] : 0, 0%
low counts [2] : 184, 12%
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
resFilt <- results[which(results$padj < 0.05 & abs(results$log2FoldChange) < 1), ]
summary(resFilt)
out of 392 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up) : 130, 33%
LFC < 0 (down) : 262, 67%
outliers [1] : 0, 0%
low counts [2] : 0, 0%
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
plotMA(results)

#Volcano plot
res1 = as.data.frame(results)
res1
# add a new column using the mutate function in dplyr
res1 = mutate(res1, sig=ifelse(res1$padj<0.05, "FDR<0.05", "Not Sig"))
res1[which(abs(res1$log2FoldChange)<0.5),'sig'] <- "Not Sig"
res1
p = ggplot(res1, aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black"))
p
Warning: Removed 184 rows containing missing values (geom_point).

#Gene Annotations
#Plot counts of top gene
topGene <- rownames(res1)[1]
plotCounts(ddsMat, gene = topGene, intgroup= "treatment")


LS0tDQp0aXRsZTogIlJOQV9TRVFfVHJhbnNjcmlwdGlvbkZhY3Rvcl9zZWVkbGluZyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCg0KDQpgYGB7cn0NCiNsaWJyYXJ5IGNhbGxpbmcNCmxpYnJhcnkoImRwbHlyIikNCmxpYnJhcnkoIkRFU2VxMiIpDQpsaWJyYXJ5KCJnZ3Bsb3QyIikNCmBgYA0KYGBge3J9DQojSW1wb3J0aW5nIHRoZSBjb3VudCBkYXRhDQpzZXR3ZCgiQzovVXNlcnMvdXNlci9EZXNrdG9wL01ZIFRoZXNpcyBqb3VybmV5KFBCVCkvUk5BIHNlcSIpDQpjb3VudF9kYXRhPC1yZWFkLmNzdignY291bnRfbWF0cml4X2FsbF9URi5jc3YnLHJvdy5uYW1lcyA9IDEsaGVhZGVyPVRSVUUpDQpjb3VudF9kYXRhPC1jb3VudF9kYXRhW3doaWNoKHJvd1N1bXMoY291bnRfZGF0YSk+NTApLF0NCmhlYWQoY291bnRfZGF0YSkNCmBgYA0KDQoNCmBgYHtyfQ0KI3JlcGxhY2Ugd2l0aCBub25femVybyB2YWx1ZSBmb3IgY3JlYXRpbmcgZGVzZXEgb2JqZWN0IHdoaWNoIGRvZXNuJ3QgZXhjZXB0IG5vbi1aZXJvIHZhbHVlDQpjb3VudF9kYXRhPC1jb3VudF9kYXRhKzENCmhlYWQoY291bnRfZGF0YSkNCmBgYA0KDQoNCmBgYHtyfQ0KI2ltcG9ydGluZyBtZXRhZGF0YSBhbmQgZmlsdGVyaW5nIGZvciBsZWFmIHRpc3N1ZQ0KbWV0YWRhdGE8LXJlYWQuY3N2KCdtZXRhZGF0YV9wcm9jZXNzZWQuY3N2Jyxyb3cubmFtZXMgPTEsaGVhZGVyPVRSVUUpDQojJT4lZmlsdGVyKHRpc3N1ZT09ImxlYWYiKQ0KaGVhZChtZXRhZGF0YSkNCmBgYA0KDQoNCmBgYHtyLG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIGVycm9yPUZBTFNFfQ0KDQojTWFrZSBERVNlcTIgb2JqZWN0IGZyb20gY291bnRzIGFuZCBtZXRhZGF0YQ0KZGRzTWF0IDwtIERFU2VxRGF0YVNldEZyb21NYXRyaXgoY291bnREYXRhID0gY291bnRfZGF0YSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbERhdGEgPSBtZXRhZGF0YSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRlc2lnbiA9IH50cmVhdG1lbnQpDQpkZHNNYXQNCg0KYGBgDQoNCmBgYHtyfQ0KIyBDb252ZXJ0IGFsbCBzYW1wbGVzIHRvIHZzZChkYXRhIG5vcm1hbGl6YXRpb24pDQp2c2QgPC0gdmFyaWFuY2VTdGFiaWxpemluZ1RyYW5zZm9ybWF0aW9uKGRkc01hdCwgYmxpbmQgPSBGQUxTRSkNCmBgYA0KYGBge3J9DQoNCiNQQ0EgUExPVA0KIyBQbG90IFBDQSBieSBjb2x1bW4gdmFyaWFibGUNCnBsb3RQQ0EodnNkLCBpbnRncm91cCA9ICJ0cmVhdG1lbnQiLCBudG9wID0gNSkgKw0KICB0aGVtZShhc3BlY3QucmF0aW89MSkgDQogICMgcmVtb3ZlIGRlZmF1bHQgZ2dwbG90MiB0aGVtZQ0KICBnZW9tX3BvaW50KHNpemUgPSAxKSArIA0KICAjIEluY3JlYXNlIHBvaW50IHNpemUNCiAgc2NhbGVfeV9jb250aW51b3VzKGxpbWl0cyA9IGMoLTE1LCAxMDApKSArIA0KICAjIGNoYW5nZSBsaW1pdHMgdG8gZml4IGZpZ3VyZSBkaW1lbnNpb25zDQogIGdndGl0bGUobGFiZWwgPSAiUHJpbmNpcGFsIENvbXBvbmVudCBBbmFseXNpcyAoUENBKSIsIA0KICAgICAgICAgIHN1YnRpdGxlID0gIlRvcCA1MDAgbW9zdCB2YXJpYWJsZSBnZW5lcyIpDQpgYGANCg0KDQpgYGB7cn0NCmBgYA0KDQoNCmBgYHtyfQ0KIyBGaW5kIGRpZmZlcmVudGlhbCBleHByZXNzZWQgZ2VuZXMNCg0KZGRzTWF0IDwtIERFU2VxKGRkc01hdCkNCmRkc01hdA0KYGBgDQoNCg0KYGBge3J9DQojIEdldCByZXN1bHRzIGZyb20gdGVzdGluZyB3aXRoIEZEUiBhZGp1c3QgcHZhbHVlcyANCnJlc3VsdHMgPC0gcmVzdWx0cyhkZHNNYXQsIHBBZGp1c3RNZXRob2QgPSAiZmRyIiwgYWxwaGEgPSAwLjA1KQ0KcmVzdWx0cw0KYGBgDQpgYGB7cn0NCiMgR2VuZXJhdGUgc3VtbWFyeSBvZiB0ZXN0aW5nLiAgDQpzdW1tYXJ5KHJlc3VsdHMpDQpgYGANCmBgYHtyfQ0KcmVzRmlsdCA8LSByZXN1bHRzW3doaWNoKHJlc3VsdHMkcGFkaiA8IDAuMDUgJiBhYnMocmVzdWx0cyRsb2cyRm9sZENoYW5nZSkgPCAxKSwgXQ0Kc3VtbWFyeShyZXNGaWx0KQ0Kd3JpdGUuY3N2KHJlc0ZpbHQsImxlYWZfVEZfcmVzRmlsdC5jc3YiKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCnBsb3RNQShyZXN1bHRzKQ0KYGBgDQpgYGB7cn0NCiNWb2xjYW5vIHBsb3QNCnJlczEgPSBhcy5kYXRhLmZyYW1lKHJlc3VsdHMpDQpyZXMxDQojIGFkZCBhIG5ldyBjb2x1bW4gdXNpbmcgdGhlIG11dGF0ZSBmdW5jdGlvbiBpbiBkcGx5cg0KcmVzMSA9IG11dGF0ZShyZXMxLCBzaWc9aWZlbHNlKHJlczEkcGFkajwwLjA1LCAiRkRSPDAuMDUiLCAiTm90IFNpZyIpKQ0KcmVzMVt3aGljaChhYnMocmVzMSRsb2cyRm9sZENoYW5nZSk8MC41KSwnc2lnJ10gPC0gIk5vdCBTaWciDQpyZXMxDQoNCg0KcCA9IGdncGxvdChyZXMxLCBhZXMobG9nMkZvbGRDaGFuZ2UsIC1sb2cxMChwYWRqKSkpICsNCiAgZ2VvbV9wb2ludChhZXMoY29sPXNpZykpICsNCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcz1jKCJyZWQiLCAiYmxhY2siKSkNCnANCmBgYA0KDQpgYGB7cn0NCiNHZW5lIEFubm90YXRpb25zDQojUGxvdCBjb3VudHMgb2YgdG9wIGdlbmUNCg0KdG9wR2VuZSA8LSByb3duYW1lcyhyZXMxKVsxXQ0KcGxvdENvdW50cyhkZHNNYXQsIGdlbmUgPSB0b3BHZW5lLCBpbnRncm91cD0gInRyZWF0bWVudCIpDQpgYGANCg0KDQoNCmBgYHtyfQ0KI3Bsb3RDb3VudHMNCiBwYXIobWZyb3c9YygyLDMpKQ0KIA0KIHBsb3RDb3VudHMoZGRzTWF0LCBnZW5lPSJPczAxZzAxMDg2MDAiLCBpbnRncm91cD0idHJlYXRtZW50IikNCiBwbG90Q291bnRzKGRkc01hdCwgZ2VuZT0iT3MwNWcwMTk1MjAwIiwgaW50Z3JvdXA9InRyZWF0bWVudCIpDQpgYGANCg0K