Problem 5

Problem 1. Understanding the power of a test.

knitr::opts_chunk$set(echo = TRUE)
library(BayesFactor)

## Warning: package 'BayesFactor' was built under R version 4.0.2

## Loading required package: coda

## Warning: package 'coda' was built under R version 4.0.2

## Loading required package: Matrix

## ************
## Welcome to BayesFactor 0.9.12-4.2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************

set.seed(100)
##this function generates n data points extracts the p/value 
##bayes factor of the one-sample two-sided test for each:
simdata1 <- function(n,mean=.1)
{
  data <- rnorm(n,mean=mean)
  
  c( t.test(data)$p.value,
    binom.test(sum(data>0),n)$p.value,
    exp(ttestBF(data)@bayesFactor$bf)) 
  ##exponentiate because bf is stored as a log and so 0 is unbiased.
  
}

runs <- 500
##this simulates runs experiments where the Null hypothesis is true:
null <- data.frame(pval=rep(NA,runs),
                           pvalnp=rep(NA,runs),
                           bayesf=rep(NA,runs))

##this simulates 1000 experiments where the alternative hypothesis is true:
simulation <- data.frame(pval=rep(NA,runs),
                           pvalnp=rep(NA,runs),
                           bayesf=rep(NA,runs))
n<-380   ##this is how many samples are drawn in each experiment
for(i in 1:runs)
{
  simulation[i,] <- simdata1(n)
  null[i,] <- simdata1(n,0)
}

## This determines how many of the cases produce a significant result.
##
# par(mfrow=c(2,3))
# hist(simulation$pval,col="navy",xlab="Standard t-test p value",main="t-test (Null is true)")
# hist(simulation$pvalnp,col="navy",xlab="Binomial test p value",main="Binomial test (Null is true)")
# hist(log(simulation$bayesf),col="navy", xlab="log(Bayes factor)", main= "Bayes factor test (Null is true)")
# hist(null$pval,col="navy",xlab="Standard t-test p value",main="t-test (Alternative is true)")
# hist(null$pvalnp,col="navy",xlab="Binomial test p value", main="Binomial test (Alternative is true)")
# hist(log(null$bayesf),col="navy", xlab="log(Bayes factor)", main="Bayes factor test (Alternative is true)")

# mean(null$pval <.05)
# mean(null$pvalnp < .05)
# mean(null$bayesf   >2 )      #evidence for the null
# mean(simulation$pval <.05)  #it's simply sum(simulation$pval <.05) / length(simulation$pval) 31/500 = 0.062
# mean(simulation$pvalnp < .05)
# mean(simulation$bayesf > 2)
#k = simulation[,1][simulation$pval <.05] this only gives us the vector containing the values

Complete this for Problem 1

library(rmarkdown)
library(knitr)

## Warning: package 'knitr' was built under R version 4.0.2

#first_row_n_10 = c(mean(simulation$pval <.05),mean(simulation$pvalnp < .05),mean(simulation$bayesf > 2),mean(null$pval <.05),mean(null$pvalnp < .05),mean(null$bayesf >2 ) )

first_row_n_10 = c(0.058, 0.020, 0.044, 0.048, 0.016, 0.036)

#second_row_n_100 = c(mean(simulation$pval <.05),mean(simulation$pvalnp < .05),mean(simulation$bayesf > 2),mean(null$pval <.05),mean(null$pvalnp < .05),mean(null$bayesf >2 ) )

second_row_n_100 = c(0.210, 0.114, 0.100, 0.048, 0.044, 0.016)

#third_row_n_2000 = c(mean(simulation$pval <.05),mean(simulation$pvalnp < .05),mean(simulation$bayesf > 2),mean(null$pval <.05),mean(null$pvalnp < .05),mean(null$bayesf >2 ) )

third_row_n_2000 = c(0.996, 0.940, 0.932, 0.038, 0.040, 0.008)

#forth_row_n_25000 = c(mean(simulation$pval <.05),mean(simulation$pvalnp < .05),mean(simulation$bayesf > 2),mean(null$pval <.05),mean(null$pvalnp < .05),mean(null$bayesf >2 ) )

forth_row_n_25000 = c(1.000, 1.000, 1.000, 0.070, 0.062, 0.000)


mat = matrix(c(first_row_n_10,second_row_n_100,third_row_n_2000,forth_row_n_25000),ncol = 6,byrow = T)


colnames(mat) <- c("Simulation_pval","Simulation_pvalnp","Simulation_bayesf",'Null_pval',
                     'Null_pvalnp','Null_bayesf')
rownames(mat) <- c("n_10","n_100","n_2000",'n_25000')
our_table <- as.table(mat) 

knitr::kable(our_table)

	Simulation_pval	Simulation_pvalnp	Simulation_bayesf	Null_pval	Null_pvalnp	Null_bayesf
n_10	0.058	0.020	0.044	0.048	0.016	0.036
n_100	0.210	0.114	0.100	0.048	0.044	0.016
n_2000	0.996	0.940	0.932	0.038	0.040	0.008
n_25000	1.000	1.000	1.000	0.070	0.062	0.000

Power increases with the sample number and it looks like it reaches its peak after a certain number threshold. It looks like a sample size of ca. 820 would give us a 80% statistical power. Interestingly, the normal t-test seems to be the most sensitive test to the sample size while the binomial and the bayesian factor seem to follow similar trends. In the t-test the false alarm rate seems to float around 0.04 (4%) and 0.07 (7%) as if sample size did not impact much the likelihood to incur into a false alarm. On the other hand, the bayesian factor was positively impacted by changes in sample size. Finally, “strangely” a bigger sample size for the binomial t-test meant a higher probability to incur into a false alarm starting from as low as 1.6% for n=10 and 6.2% for n=25000.

Problem 2: t-tests

data <- c(101,103,99,92,110,105,103,102,104,106,101,
          101,101,102,103,101,99,104,105,102,102,103,
          345,103,107,101,103,108,104,101,101,102)

# one_sample_ttest_2tails = function(x,true_mean)
# {
#   sd <- sd(x)
#   n <- length(x)
#   nominator = mean(x)
#   denominator = sd/sqrt(n)
#   t = (nominator-true_mean)/denominator              #the pvalue formula is not correct
#   pvalue = 2 * pnorm(abs(t), lower.tail = FALSE)
#   print(sprintf('t = %s --- pvalue = %s ',round(t,3),round(pvalue,6)))
# }

t_value = function(x,true_mean)
{
  sd <- sd(x)
  n <- length(x)
  nominator = mean(x)
  denominator = sd/sqrt(n)
  t = (nominator-true_mean)/denominator
  print(sprintf('t = %s',round(t,4)))
}

t_value(data,100)

## [1] "t = 1.3329"

t.test(data,mu=100,alternative="greater")

## 
##  One Sample t-test
## 
## data:  data
## t = 1.3329, df = 31, p-value = 0.09614
## alternative hypothesis: true mean is greater than 100
## 95 percent confidence interval:
##  97.24563      Inf
## sample estimates:
## mean of x 
##   110.125

data2 = data[data<344]

t_value(data2,100)

## [1] "t = 4.5349"

t.test(data2,mu=100,alternative="greater")

## 
##  One Sample t-test
## 
## data:  data2
## t = 4.5349, df = 30, p-value = 4.316e-05
## alternative hypothesis: true mean is greater than 100
## 95 percent confidence interval:
##  101.5946      Inf
## sample estimates:
## mean of x 
##  102.5484

binom.test(sum(data>100),length(data),p=.5,alternative = "greater")

## 
##  Exact binomial test
## 
## data:  sum(data > 100) and length(data)
## number of successes = 29, number of trials = 32, p-value = 1.278e-06
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
##  0.7751839 1.0000000
## sample estimates:
## probability of success 
##                0.90625

binom.test(sum(data2>100),length(data2),p=.5,alternative = "greater")

## 
##  Exact binomial test
## 
## data:  sum(data2 > 100) and length(data2)
## number of successes = 28, number of trials = 31, p-value = 2.325e-06
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
##  0.768497 1.000000
## sample estimates:
## probability of success 
##              0.9032258

The t test for our original data is not significantly different from the true mean because our sd is very large. When we remove the outlier value we now get a very small sd and bigger t-value which means more statistical evidence. The binomial test does not take into consideration the mean nor the sd but only the amount of cases that are greater (one-tail) than the true-mean (>100) and it appears that the vast majority of values are higher than 100. Henceforth, it detects a very high probability of success (alternative hypothesis is true) with and without the outlier.

Problem 3. Comparing t-test, wilcox, and Bayes factor t-test of two groups

week2 <- ChickWeight[ChickWeight$Time==14,]
diet1 <- week2[week2$Diet==1,][1:10,] #just get the first 10 chicks
diet2 <- week2[week2$Diet==2,]
diet3 <- week2[week2$Diet==3,]
diet4 <- week2[week2$Diet==4,]

library(BayesFactor)
library('car')

## Warning: package 'car' was built under R version 4.0.2

## Loading required package: carData

par(mfrow=c(2,2))
qqPlot(diet1$weight,main = 'Visual inspection of normality assumption diet1')

## [1] 7 9

qqPlot(diet2$weight,main = 'diet2')

## [1] 1 4

qqPlot(diet3$weight,main = 'diet3')

## [1] 5 7

qqPlot(diet4$weight,main = 'diet4')

## [1] 3 4

ttest_pvalues = c(t.test(diet1$weight,diet2$weight,alternative = 'two.sided')$p.value,
t.test(diet1$weight,diet3$weight,alternative = 'two.sided')$p.value,
t.test(diet1$weight,diet4$weight,alternative = 'two.sided')$p.value,
t.test(diet2$weight,diet3$weight,alternative = 'two.sided')$p.value,
t.test(diet2$weight,diet4$weight,alternative = 'two.sided')$p.value,
t.test(diet3$weight,diet4$weight,alternative = 'two.sided')$p.value)

single_combinations = 3
bonferronicorrection = 0.05/single_combinations #correct by number of possible combinations of a diet with respect to the other ones

significant_ttest = c('diet12','diet13','diet14','diet23','diet24','diet34')[match(ttest_pvalues[ttest_pvalues <= bonferronicorrection],ttest_pvalues)]
                                                                                  
significant_ttest

## [1] "diet14"

wilcox.test(diet2$weight,diet1$weight,paired=T)$p.value

## Warning in wilcox.test.default(diet2$weight, diet1$weight, paired = T): cannot
## compute exact p-value with zeroes

## [1] 0.722283

wilcox.test(diet3$weight,diet1$weight,paired=T)$p.value

## [1] 0.03710938

wilcox.test(diet4$weight,diet1$weight,paired=T)$p.value

## [1] 0.01953125

wilcox.test(diet3$weight,diet2$weight,paired=T)$p.value

## [1] 0.2753906

wilcox.test(diet4$weight,diet2$weight,paired=T)$p.value

## Warning in wilcox.test.default(diet4$weight, diet2$weight, paired = T): cannot
## compute exact p-value with ties

## [1] 0.1848583

wilcox.test(diet4$weight,diet3$weight,paired=T)$p.value

## [1] 0.8457031

ttestBF(diet1$weight,diet2$weight)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 0.4805565 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

ttestBF(diet1$weight,diet3$weight)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 2.836878 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

ttestBF(diet1$weight,diet4$weight)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 8.371614 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

ttestBF(diet2$weight,diet3$weight)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 0.7020066 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

ttestBF(diet2$weight,diet4$weight)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 0.7475864 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

ttestBF(diet3$weight,diet4$weight)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 0.4045955 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

For the standard t-test the only statistical significant difference in weight can be found between diet 1 and diet 4 while, when using the Wilcoxon, no significant p values are reported (0.0195 (diet4-diet1) is bigger than 0.05/3 = 0.0166 accounting for the Bonferroni correction). As reported by Peter Westfall (1997) from a bayesian point of view there shouldn’t be any need to adjust the posterior probability of the event provided one’s prior is well calibrated. Choosing a bayesian approach differences it seems like now two significant differences between diet1 and diet3 at bf = 2.83 and between diet1 and diet4 at bf = 8.37 can be found. However bf = 2.83 is not a very strong evidence value to support the alternative hypothesis as much as bf = 8.37 I guess that the most conservative approach would simply be using a parametric t-test where the non-parametric t-test as the Wilcoxon would not detect significant differences and thus making us incur into a type 2 error. Lastly, the bayesian approach also looks reasonable.

Problem 4. Comparison via paired tests.

days <- ChickWeight[ChickWeight$Diet==1&
                      (ChickWeight$Time==10|ChickWeight$Time==12),]
byDay <- as.data.frame(tapply(days$weight,
                              list(Chick=factor(days$Chick),
                                    Day=paste("D",days$Time,sep="")),I))



days2 = ChickWeight[ChickWeight$Diet==1&
                      (ChickWeight$Time==10|ChickWeight$Time==14),]

byDay2 <- as.data.frame(tapply(days2$weight,
                              list(Chick=factor(days2$Chick),
                                    Day=paste("D",days2$Time,sep="")),I))



byDay2 = na.omit(byDay2)

qqPlot(byDay2$D10,main = 'Visual inspection of normality assumption day 10')

## [1] 11 17

qqPlot(byDay2$D14,main = 'Visual inspection of normality assumption day 14')

## [1] 17  1

t.test(byDay2$D10,byDay2$D14,paired=T)

## 
##  Paired t-test
## 
## data:  byDay2$D10 and byDay2$D14
## t = -5.7917, df = 17, p-value = 2.168e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -38.19994 -17.80006
## sample estimates:
## mean of the differences 
##                     -28

t.test(byDay2$D10,byDay2$D14,paired=F)

## 
##  Welch Two Sample t-test
## 
## data:  byDay2$D10 and byDay2$D14
## t = -2.7801, df = 26.524, p-value = 0.009865
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -48.682169  -7.317831
## sample estimates:
## mean of x mean of y 
##  95.38889 123.38889

wilcox.test(byDay2$D14,byDay2$D10,paired=T)

## Warning in wilcox.test.default(byDay2$D14, byDay2$D10, paired = T): cannot
## compute exact p-value with zeroes

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  byDay2$D14 and byDay2$D10
## V = 151, p-value = 0.00046
## alternative hypothesis: true location shift is not equal to 0

wilcox.test(byDay2$D14,byDay2$D10,paired=F)

## Warning in wilcox.test.default(byDay2$D14, byDay2$D10, paired = F): cannot
## compute exact p-value with ties

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  byDay2$D14 and byDay2$D10
## W = 234.5, p-value = 0.02267
## alternative hypothesis: true location shift is not equal to 0

diff <- byDay2$D14-byDay2$D10
binom.test(sum(diff>0),length(diff))

## 
##  Exact binomial test
## 
## data:  sum(diff > 0) and length(diff)
## number of successes = 16, number of trials = 18, p-value = 0.001312
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
##  0.6528796 0.9862488
## sample estimates:
## probability of success 
##              0.8888889

ttestBF(byDay2$D10,byDay2$D14,paired = TRUE)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 1102.961 ±0%
## 
## Against denominator:
##   Null, mu = 0 
## ---
## Bayes factor type: BFoneSample, JZS

ttestBF(byDay2$D10,byDay2$D14,paired = FALSE)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 5.533314 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

The right one to choose is the paired t test because it takes into account the fact that the two variables are dependent and this means higher likelihood that the change did happen, hence bringing more statistical evidence/significance.This is particularly clear when running a bayesisan factor analysis where the bf = 1102 if diet1 and diet4 are related and only bf = 5.53 is they are not (which is still supporting the alternative hypothesis but not to the same extent).

Problem 5. Robustness to transforms

diet1$tp <- 1-1/(diet1$weight-80)
diet3$tp <- 1-1/(diet3$weight-80)

t.test(diet1$weight,diet3$weight,alternative = 'two.sided')

## 
##  Welch Two Sample t-test
## 
## data:  diet1$weight and diet3$weight
## t = -2.4768, df = 17.168, p-value = 0.02395
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -64.051217  -5.148783
## sample estimates:
## mean of x mean of y 
##     129.9     164.5

t.test(diet1$tp,diet3$tp,alternative = 'two.sided')

## 
##  Welch Two Sample t-test
## 
## data:  diet1$tp and diet3$tp
## t = -2.0526, df = 10.463, p-value = 0.06599
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.033930763  0.001290364
## sample estimates:
## mean of x mean of y 
## 0.9696721 0.9859923

ttestBF(diet1$weight,diet3$weight)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 2.836878 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

ttestBF(diet1$tp,diet3$tp)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 1.59892 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

wilcox.test(diet3$weight,diet1$weight,paired=T)

## 
##  Wilcoxon signed rank exact test
## 
## data:  diet3$weight and diet1$weight
## V = 48, p-value = 0.03711
## alternative hypothesis: true location shift is not equal to 0

wilcox.test(diet3$tp,diet1$tp,paired=T)

## 
##  Wilcoxon signed rank exact test
## 
## data:  diet3$tp and diet1$tp
## V = 48, p-value = 0.03711
## alternative hypothesis: true location shift is not equal to 0

Changes when trasforming data occur for the bayesian factor analysis and the parametric t-test.In the parametric t-test the toothpaste change is not significant while the weight is. Similarly in the bayesian factor analysis the toothpaste change is somewhat ambigous for the toothpaste and closer to rejecting the null hypothesis for the weight changed. However, for the Wilcoxon rank test transforming, the two different varibales do not show different results because it does not account for their magnitude and/or distribution.

Problem 6: Correlations

x <- runif(20)
y <- runif(20)

z <- x + .5*y
z1 <- z
z2 <- z+10
z3 <- log(z)
z4 <- z*10
z5 <- z + 20*runif(20) + 5*runif(20)

cor.test(x,z1,method="pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  x and z1
## t = 8.7343, df = 18, p-value = 6.865e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7591561 0.9599241
## sample estimates:
##       cor 
## 0.8994975

cor.test(x,z2,method="pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  x and z2
## t = 8.7343, df = 18, p-value = 6.865e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7591561 0.9599241
## sample estimates:
##       cor 
## 0.8994975

cor.test(x,z3,method="pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  x and z3
## t = 6.6531, df = 18, p-value = 3.043e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6390818 0.9363207
## sample estimates:
##      cor 
## 0.843153

cor.test(x,z4,method="pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  x and z4
## t = 8.7343, df = 18, p-value = 6.865e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7591561 0.9599241
## sample estimates:
##       cor 
## 0.8994975

cor.test(x,z5,method="pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  x and z5
## t = 1.0405, df = 18, p-value = 0.3119
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2284083  0.6158010
## sample estimates:
##       cor 
## 0.2381874

plot(x,z5)

cor.test(x,z1,method="spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  x and z1
## S = 162, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.8781955

cor.test(x,z2,method="spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  x and z2
## S = 162, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.8781955

cor.test(x,z3,method="spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  x and z3
## S = 162, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.8781955

cor.test(x,z4,method="spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  x and z4
## S = 162, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.8781955

cor.test(x,z5,method="spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  x and z5
## S = 1120, p-value = 0.5046
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1578947

The Pearson’s product-moment correlation results are identical between x and z1,z2,z4 but differ between x and z3 because taking a logarithm is not a linear transformation and the Pearson coefficient only tells us about the amount of linear dependece between these 2 variables.In order for us to disregard our log transformation we can use the Spearman’s rank correlation rho that operates in ranks and does not care about linearity. Furthermore, the Spearman correlation pvalue and rho are smaller and greater respectively in comparison to the Pearson pvalue and cor value. Lastly, the correlation between x and z5 is almost inexistent for both tests because there is too much noise.