First we load up the neccesary libraries:
library(fdrtool)
library(ggplot2)
library(grid)
library(gridExtra)
Load in the supplied data from the text file:
setwd("/Users/Nick/spring15/independentStudy")#have to set this by chunk of code
obesityRaw = read.table("data/obesity_censored.txt", header = TRUE, fill = TRUE)
obesity = na.omit(obesityRaw) #There are some NAs in the data.
Generate a simple histogram of the p values of the words:
ggplot(obesity, aes(x = pvalue)) +
geom_histogram(aes(y=..count../sum(..count..)), color = "black", fill = "#7fc97f") +
labs(title = "Histogram of p-values for obesity words (normalized)",
x = "P-Value", y = "% of words at p-val")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
#ggsave("figures/Obesity_PVal_Hist_Norm.pdf") #Save
Run the data through the FDR package.
FDRresults = fdrtool(obesity$pvalue, statistic = "pvalue", plot = TRUE)
## Step 1... determine cutoff point
## Step 2... estimate parameters of null distribution and eta0
## Step 3... compute p-values and estimate empirical PDF/CDF
## Step 4... compute q-values and local fdr
## Step 5... prepare for plotting
obesity$qvalue = FDRresults$qval
P = ggplot(obesity, aes(x = pvalue)) +
geom_histogram(aes(y=..count../sum(..count..)),fill = "#7fc97f", alpha = 0.6) + ylim(0,0.4)
Q = ggplot(obesity, aes(x = qvalue)) +
geom_histogram(aes(y=..count../sum(..count..)), fill = "#beaed4", alpha = 0.6) + ylim(0,0.4)
Plot = arrangeGrob(P,Q,ncol=2, main = "P-Val vs, Q-Val")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Plot
#ggsave("figures/Obesity_PVal_vs_Qval.pdf",out) #Save