#Reference:
#http://www.gettinggeneticsdone.com/2014/05/r-volcano-plots-to-visualize-rnaseq-microarray.html
#Read-in /mnt/bd2/Turles/BrainGOonly_edit.csv
BrainGOdata <- read.csv("/mnt/bd2/Turles/BrainGOonly_edit.csv", header=TRUE, sep=",")
#Glance at data
head(BrainGOdata)
## SeqName Description PPEE PPDE PostFC
## 1 c14505_g1_i1 Myb-related B 0 1 0.76230333
## 2 c1819_g1_i1 importin subunit alpha-3 isoform X1 0 1 0.89173484
## 3 c18256_g1_i2 Transcription factor COE1 0 1 0.13070670
## 4 c1869_g1_i1 14-3-3 zeta-like isoform X1 0 1 0.89780314
## 5 c27686_g1_i1 PREDICTED: ovostatin-like, partial 0 1 0.01490221
## 6 c28548_g1_i1 four and a half LIM domains 1 0 1 0.66376355
## RealFC FC...... ABS.FC. eValue FDR pValue sim.mean
## 1 0.762119513 -1.311814 1.311814 0.00000e+00 0 0 98.00
## 2 0.891711420 -1.121410 1.121410 0.00000e+00 0 0 100.00
## 3 0.112813204 -7.650717 7.650717 1.01809e-28 0 0 83.33
## 4 0.897813424 -1.113830 1.113830 1.67911e-165 0 0 99.00
## 5 0.000313928 -67.104152 67.104152 0.00000e+00 0 0 97.00
## 6 0.663559680 -1.506561 1.506561 0.00000e+00 0 0 98.33
## log2FoldChange X.GO GO.IDs GO.Names
## 1 0.3915629175 4 F:GO:0003677 F:GO:0003700
## 2 0.1653133164 8 C:GO:0070062 P:GO:0006607
## 3 2.9355949561 6 C:GO:0005634 F:GO:0003677
## 4 0.1555289523 1 F:GO:0019904 F:protein domain specific binding
## 5 6.0683301237 3 C:GO:0005615 F:GO:0004866
## 6 0.5912586803 1 F:GO:0008270 F:zinc ion binding
## tre1
## 1 P:GO:0006355
## 2 C:GO:0005829
## 3 F:GO:0046872
## 4
## 5 P:GO:0010951
## 6
#Try plotting w/ eVal
with(BrainGOdata, plot(log2FoldChange, -log10(eValue), main="Volcano plot"))

#Produces 50 warnings:
#1: In min(x) : no non-missing arguments to min; returning Inf
#2: In max(x) : no non-missing arguments to max; returning -Inf
###Investigate what may cause error, e.g. "NA" and zeros
#See if there are NAs in eVal column
sum(is.na(BrainGOdata$eValue))
## [1] 1110
#See if there are zeros in eVal column
length(grep("0", BrainGOdata$eValue))
## [1] 1599
###Subset the data: remove NAs and zeros in eVal column
#Remove NAs
BrainGOdata <- BrainGOdata[!is.na(BrainGOdata$eValue),]
#Remove zeros
BrainGOdata <- BrainGOdata[BrainGOdata$eValue!="0", ]
#Re-check to make sure NAs and zeros were cleared out
sum(is.na(BrainGOdata$eValue))
## [1] 0
length(grep("0", BrainGOdata$eValue))
## [1] 579
#For some reason, the above command says there are 579 zeros,
#but min() returns 1.72138e-180
min(BrainGOdata$eValue)
## [1] 1.72138e-180
#Try plotting again
with(BrainGOdata, plot(log2FoldChange, -log10(eValue), main="Volcano plot", xlab = "log2FoldChange", ylab = "-log10(eValue)"))

###Let's try this w/ the p-val instead of e-val
###Note: no cut-offs are implemented here
#Reset data
BrainGOdata <- read.csv("/mnt/bd2/Turles/BrainGOonly_edit.csv", header=TRUE, sep=",")
#Remove NAs
BrainGOdata <- BrainGOdata[!is.na(BrainGOdata$pValue),]
#Remove zeros
BrainGOdata <- BrainGOdata[BrainGOdata$pValue!="0", ]
#Try plotting
with(BrainGOdata, plot(log2FoldChange, -log10(na.omit(as.numeric(pValue))), main="Volcano plot", xlab = "log2FoldChange", ylab = "-log10(eValue)"))

###Try using ggplot
#install.packages("ggplot2") #only run this command once and if you haven't already installed
library(ggplot2)
#First, find FDR cut-off
BrainGOdata$threshold = as.factor(abs(as.numeric(BrainGOdata$log2FoldChange)) > 2 & as.numeric(BrainGOdata$FDR) < 0.05)
#Produces something, but it's not right
g = ggplot(data=BrainGOdata, aes(x=as.numeric(log2FoldChange), y=-log10(as.numeric(pValue)), colour=threshold)) +
geom_point(alpha=0.4, size=1.75) +
#opts(legend.position = "none") +
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")
g

#Here's where I'm leaving off on 7/10/17
#Still trying to deal the the factor problem
#Convert to numeric
mylog2FoldChange <- as.numeric(as.character(BrainGOdata$log2FoldChange))
mypvalue <- -log10(as.numeric(as.character(BrainGOdata$pValue)))
myFDR <- as.numeric(as.character(BrainGOdata$FDR))
#First, find FDR cut-off
BrainGOdata$threshold = as.factor(abs(mylog2FoldChange) > 2 & myFDR < 0.05)
g = ggplot(data=BrainGOdata, aes(x=mylog2FoldChange, y=mypvalue, colour=threshold)) +
geom_point(alpha=0.4, size=1.75) +
theme(legend.position = "none") +
ylim(c(-0.30, -2.4))
xlim(c(-10, 10))
## <ScaleContinuousPosition>
## Range:
## Limits: -10 -- 10
xlab("log2 fold change") + ylab("-log10 p-value")
## Error in xlab("log2 fold change") + ylab("-log10 p-value"): non-numeric argument to binary operator
g

#levels(factor(BrainGOdata$pValue))