tt <- function(x,y,mu0=0,ts=TRUE) { # two-sided t-test
  nx <- length(x)
  ny <- length(y)
  sp <- sqrt(((nx-1)*var(x)+(ny-1)*var(y))/(nx+ny-2))
  t <- (mean(x)-mean(y)-mu0) / (sp*sqrt(1/nx+1/ny))
  df <- (var(x)/nx + var(y)/ny)^2 / 
    ((var(x)/nx)^2/(nx-1) + (var(y)/ny)^2/(ny-1))
  
  sample_sizes <- c(nx, ny)
  names(sample_sizes) <- c("x","y")
  sample_means <- c(mean(x), mean(y))
  names(sample_means) <- c("x", "y")
  pvalue <- ifelse(ts,2,1)*(1-pt(abs(t),df=df))
  cohensd <- (mean(x) - mean(y))/sp
  
  list(sample_sizes=sample_sizes, sample_means=sample_means,
       df=df, pvalue=pvalue, cohensd=cohensd)
}

tt_significant <- function(tt.df, alpha=0.05) {
  pvalues <- tt.df$pvalue 
  pvalues <- as.numeric(pvalues < alpha)
  names(pvalues) <- tt.df$variable
  pvalues
}

plot_alpha <- function(tt.df, from=0.01, to=1.0, by=0.01) {
  alpha <- seq(from, to, by)
  prop_significant <- sapply(alpha, function(x) mean(tt_significant(tt.df, x)))
  plot(x=alpha, y=prop_significant, ylab="Proportion significant", xlab="Alpha",
       type='l')
}

expr_data <- data.frame(matrix(
  c("1007_s_at", "11.376519", "11.826743", "11.123022", "11.743439", "12.172961", 
    "11.522009", "11.288407", "11.364544", "11.783231", "12.102697", "12.141934", 
    "12.141672", "11.541161", "12.069206", "11.529456", "9.692066", "11.242988", 
    "DDR1", "U48705", "1053_at", "7.270398", "7.534450", "7.169297", "7.730833", 
    "6.728914", "7.033900", "8.152550", "7.357942", "7.811469", "6.704366",  
    "7.723678", "7.607720", "6.904579", "6.837490", "7.437899", "7.608960", 
    "6.704648", "RFC2", "M87338", "117_at", "8.172823", "8.350568", "8.216073", 
    "8.052177", "7.940714", "8.122496", "8.246269", "7.597745", "8.809971", 
    "7.299070", "7.808597", "8.390707", "7.653514", "8.680945", "8.050873", 
    "9.242006", "8.253535", "HSPA6", "X51757", "121_at", "10.064195", 
    "11.193688", "9.846189", "10.549992", "10.172722", "10.357284", "11.361081", 
    "10.446139", "11.165541", "10.285435", "10.123556", "10.532735", 
    "10.379335", "10.487541", "10.542419", "10.248043", "10.207259", "PAX8", 
    "X69699", "1255_g_at", "6.256425", "6.830607", "5.825010", "6.098157", 
    "6.104971", "5.818458", "6.355995", "6.311312", "7.366574", "5.577412", 
    "4.570794", "5.046956", "6.561945", "5.897955", "5.402725", "5.957542", 
    "6.201037", "GUCA1A", "L36861", "1294_at", "9.347887", "9.540260", 
    "9.229501", "9.464348", "9.764903", "9.962180", "9.300450", "9.230649", 
    "9.783263","8.749285", "9.466965", "9.653450", "9.076623", "9.827835", 
    "9.096732", "9.441370", "9.102000", "UBA7", "L13852"), nrow=6, byrow=TRUE))

row.names(expr_data) <- expr_data[,1]
expr_data <- expr_data[,-1]
names(expr_data) <- c("GSM119944", "GSM119945", "GSM119946", "GSM119947", 
                      "GSM119948", "GSM119949", "GSM119950", "GSM119951", 
                      "GSM119952", "GSM119953", "GSM119954", "GSM119955", 
                      "GSM119956", "GSM119957", "GSM119958", "GSM119959", 
                      "GSM119960", "GeneSymbol", "PublicID")
expr_data[,1:17] <- sapply(expr_data[,1:17], function(x)
  as.numeric(as.character(x)))    

groups_data <- data.frame(
  PatientID=c('GSM119946','GSM119948','GSM119951','GSM119955','GSM119956',
              'GSM119959','GSM119947','GSM119950','GSM119952','GSM119953',
              'GSM119957','GSM119958','GSM119944','GSM119945','GSM119949',
              'GSM119954','GSM119960'),
  TreatmentGroup = c(rep('Control',6), rep('Treatment1',6), 
                     rep('Treatment2',5))
)

control_cols <- groups_data$PatientID[which(groups_data$TreatmentGroup=="Control")]
treatment_cols <- groups_data$PatientID[which(groups_data$TreatmentGroup=="Treatment1")]

# nrows <- dim(expr_data)[1]
# for(i in 1:nrows) {
#   control_group <- expr_data[i,control_cols]
#   treatment_group <- expr_data[i,treatment_cols]
#   
#   cat("T-test for control vs treatment (", row.names(treatment_group), ")\n")
#   result <- tt(as.numeric(control_group), as.numeric(treatment_group), 0, FALSE)
#   cat("  sample sizes:", as.numeric(result$sample_sizes),"\n")
#   cat("  sample means:", as.numeric(result$sample_means),"\n")
#   cat("  degrees of freedom:", as.numeric(result$df),"\n")
#   cat("  p-value:", as.numeric(result$pvalue),"\n\n")
# }

tt.df <- data.frame(Variable=character(0), SampleSize1=numeric(0), SampleSize2=numeric(0),
                    SampleMean1=numeric(0), SampleMean2=numeric(0), DegreesFreedom=numeric(0),
                    Pvalue=numeric(0), CohensD=numeric(0))
# store results in data.frame
nrows <-  dim(expr_data)[1]
for(i in 1:nrows) {
  control_group <- expr_data[i,control_cols]
  treatment_group <- expr_data[i,treatment_cols]
  result <- tt(as.numeric(control_group), as.numeric(treatment_group), 0, FALSE)
  
  tt.df <- rbind(tt.df, data.frame(variable=row.names(treatment_group),
                                   size1=result$sample_sizes[1],
                                   size2=result$sample_sizes[2],
                                   mean1=result$sample_means[1],
                                   mean2=result$sample_means[2],
                                   df=result$df,
                                   pvalue=result$pvalue,
                                   cohensd=result$cohensd))
}
row.names(tt.df) <- 1:nrow(tt.df)

print(tt.df)
##    variable size1 size2     mean1     mean2       df     pvalue
## 1 1007_s_at     6     6 11.339238 11.752739 6.165043 0.16600503
## 2   1053_at     6     6  7.229569  7.445768 8.484730 0.22795627
## 3    117_at     6     6  8.173460  8.189884 9.865556 0.48076769
## 4    121_at     6     6 10.270860 10.732001 7.980862 0.02555726
## 5 1255_g_at     6     6  5.967956  6.116470 9.223889 0.34351776
## 6   1294_at     6     6  9.399416  9.370319 8.588961 0.44419898
##       cohensd
## 1 -0.60773394
## 2 -0.45104152
## 3 -0.02855896
## 4 -1.32376621
## 5 -0.24008516
## 6  0.08345823
cat("\nSignificance at alpha=0.05:\n-----------------------------\n")
## 
## Significance at alpha=0.05:
## -----------------------------
print(tt_significant(tt.df))
## 1007_s_at   1053_at    117_at    121_at 1255_g_at   1294_at 
##         0         0         0         1         0         0
plot_alpha(tt.df)