Processing math: 100%

The following appeared as a project assignment (using Open Science Framework) in the coursera course Improving your Statistical Inference. The project is available here: https://osf.io/wmp5a/.

Theoretical hypothesis

The theoretical hypothesis we are going to test is the following: both Satyajit Ray (from Kolkata, India) and Akira Kurosawa (from Japan) are great directors, both of them won the Academy Award for their Lifetime Achievement. Because they are both great, the movies they directed are equally good.

Dependent Variables to be measured

Movie Rating (Out of 10) #Users Rated Release Year
The Stranger (Agantuk) 8.1 1760 1991
Shakha Prosakha 7.6 453 1990
Ganashatru 7.3 662 1989
Ghare Baire 7.7 812 1984
Hirak Rajar Deshe 8.8 1387 1980
Jai Baba Felunath 7.9 1086 1979
Shatranj Ke Khilari 7.8 2370 1977
Jana Aranya 8.3 887 1976
Sonar Kella 8.5 1308 1974
Distant Thunder (Ashani Sanket) 8.2 908 1973
Company Limited (Seemabaddha) 8.0 782 1971
Pratidwandi 8.2 1051 1970
Days and Nights in the Forest (Aranyer Din Ratri) 8.3 1720 1970
Goopy Gayen Bagha Bayen 8.8 1495 1969
Chiriyakhana 7.2 477 1967
Nayak 8.3 1974 1966
Mahapurush 7.3 719 1965
The Coward (Kapurush) 7.8 858 1965
Charulata 8.3 3597 1964
Mahanagar 8.3 2275 1963
Abhijaan 8.0 781 1962
Kanchenjungha 8.0 706 1962
Teen Kanya 8.2 991 1961
Devi 8.0 1407 1960
The World of Apu (Apur Sansar) 8.2 8058 1959
The Music Room (Jalshaghar) 8.1 3872 1958
Paras-Pathar 7.8 723 1958
Aparajito 8.2 7880 1956
Pather Panchali 8.4 15799 1955
Movie Rating (Out of 10) #Users Rated Release Year
Maadadayo 7.4 4035 1993
Rhapsody in August 7.3 5131 1991
Dreams 7.8 19373 1990
Ran 8.2 84277 1985
Kagemusha 8.0 25284 1980
Dersu Uzala 8.3 18898 1975
Dodes’ka-den 7.5 4839 1970
Red Beard 8.3 12295 1965
High and Low 8.4 19989 1963
Sanjuro 8.2 22296 1962
Yojimbo 8.3 80906 1961
The Bad Sleep Well 8.1 8082 1960
The Hidden Fortress 8.1 25980 1958
The Lower Depths 7.5 3776 1957
Throne of Blood 8.1 34723 1957
I Live in Fear 7.4 3090 1955
Seven Samurai 8.7 247406 1954
Ikiru 8.3 46692 1952
The Idiot 7.4 3533 1951
Rashomon 8.3 112668 1950
Scandal 7.4 2580 1950
Stray Dog 7.9 11789 1949
The Quiet Duel 7.5 2131 1949
Drunken Angel 7.8 7422 1948
One Wonderful Sunday 7.3 1988 1947
Waga seishun ni kuinashi 7.2 2158 1946
Asu o tsukuru hitobito 6.6 119 1946
The Men Who Tread on the Tiger’s Tail 6.8 2567 1945
Zoku Sugata Sanshirô 6.2 1419 1945
Zoku Sugata Sanshirô 5.8 1229 1944

Justify the sample size

Results

library(pwr)
p.t.two <- pwr.t.test(d=0.8,power=0.84,sig.level=0.05,type="two.sample",alternative="two.sided")
print(p.t.two)
## 
##      Two-sample t test power calculation 
## 
##               n = 28.27101
##               d = 0.8
##       sig.level = 0.05
##           power = 0.84
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
plot(p.t.two)

plot(p.t.two, xlab="sample size per group")

pwr.t.test(n=29,power=0.84,sig.level=0.05,type="two.sample",alternative="two.sided")
## 
##      Two-sample t test power calculation 
## 
##               n = 29
##               d = 0.78948
##       sig.level = 0.05
##           power = 0.84
##     alternative = two.sided
## 
## NOTE: n is number in *each* group

Specify the statistical test to conduct

# NHST two-sided t-test 
# 1. with t.test
t.test(dfs[,2], dfa[,2], alternative='two.sided', conf.level=0.9)
## 
##  Welch Two Sample t-test
## 
## data:  dfs[, 2] and dfa[, 2]
## t = 2.6853, df = 46.646, p-value = 0.01
## alternative hypothesis: true difference in means is not equal to 0
## 90 percent confidence interval:
##  0.1444567 0.6258881
## sample estimates:
## mean of x mean of y 
##  8.055172  7.670000
# 2. with formula
n1 <- length(dfs[,2])
n2 <- length(dfa[,2])
mu1 <- mean(dfs[,2])
mu2 <- mean(dfa[,2])
sd1 <- sd(dfs[,2]) 
sd2 <- sd(dfa[,2]) 
df <- n1 + n2 - 2
s.pooled <- sqrt(((n1-1)*sd1^2 + (n2-1)*sd2^2)/df)
t.stat <- (mu1 - mu2 - 0) / (s.pooled * sqrt(1/n1+1/n2))
p.value <- pt(t.stat, df, lower.tail=FALSE)
print(paste0('t.stat=', t.stat, ' df=', df, ' p.value=', p.value, 
            ' CI=(', (mu1-mu2)-qt(0.95, df)*s.pooled*sqrt(1/n1+1/n2), ',',  (mu1-mu2)+qt(0.95,df)*s.pooled*sqrt(1/n1+1/n2), ')'))
## [1] "t.stat=2.66216514176719 df=57 p.value=0.00503579567833758 CI=(0.143256766714653,0.627088060871554)"
d <- abs(mu1 - mu2) / s.pooled # choen's d_s
print(paste('Obeserved effect size (Cohen\'s d_s) = ', d))
## [1] "Obeserved effect size (Cohen's d_s) =  0.693268347374739"
#TOST Equivalence Test  
#low equivalence bound (Cohen's d)  -0.8
#high equivalence bound (Cohen's d) 0.8

delta.L <- -0.8
delta.U <- 0.8
#One-Sided Test 1       
t.L <- (mu1 - mu2 - delta.L) / (s.pooled * sqrt(1/n1+1/n2))
p.L <- pt(t.L, df, lower.tail=FALSE)
#One-Sided Test 2   
t.U <- (mu1 - mu2 - delta.U) / (s.pooled * sqrt(1/n1+1/n2))
p.U <- pt(t.U, df, lower.tail=FALSE)

t.stat <- ifelse(abs(t.L) < abs(t.U), t.L, t.U)
p.value <- max(p.L, p.U)
print(paste0('t.L=', t.L, ' df=', df, ' p.L=', p.L))
## [1] "t.L=8.19146069136423 df=57 p.L=1.6537791186325e-11"
print(paste0('t.U=', t.U, ' df=', df, ' p.U=', p.U))
## [1] "t.U=-2.86713040782984 df=57 p.U=0.997102267946586"
print(paste0('t.stat=', t.stat, ' df=', df, ' p.value=', p.value))
## [1] "t.stat=-2.86713040782984 df=57 p.value=0.997102267946586"
library(compute.es)
tes(t=(mu1 - mu2 - 0) / (s.pooled * sqrt(1/n1+1/n2)), n.1=n1, n.2=n2, level=90)
## Mean Differences ES: 
##  
##  d [ 90 %CI] = 0.69 [ 0.24 , 1.14 ] 
##   var(d) = 0.07 
##   p-value(d) = 0.01 
##   U3(d) = 75.59 % 
##   CLES(d) = 68.8 % 
##   Cliff's Delta = 0.38 
##  
##  g [ 90 %CI] = 0.68 [ 0.24 , 1.13 ] 
##   var(g) = 0.07 
##   p-value(g) = 0.01 
##   U3(g) = 75.3 % 
##   CLES(g) = 68.57 % 
##  
##  Correlation ES: 
##  
##  r [ 90 %CI] = 0.33 [ 0.12 , 0.51 ] 
##   var(r) = 0.01 
##   p-value(r) = 0.01 
##  
##  z [ 90 %CI] = 0.35 [ 0.12 , 0.57 ] 
##   var(z) = 0.02 
##   p-value(z) = 0.01 
##  
##  Odds Ratio ES: 
##  
##  OR [ 90 %CI] = 3.52 [ 1.56 , 7.93 ] 
##   p-value(OR) = 0.01 
##  
##  Log OR [ 90 %CI] = 1.26 [ 0.44 , 2.07 ] 
##   var(lOR) = 0.24 
##   p-value(Log OR) = 0.01 
##  
##  Other: 
##  
##  NNT = 4.15 
##  Total N = 59
#This code is originally written by Jeff Rouder: http://jeffrouder.blogspot.nl/2016/01/what-priors-should-i-use-part-i.html
#This code is for a one-sided t-test, testing a difference against 0.

N <- 29 # sample size
dz <- 0.693268347374739 # Cohen's dz effect size observed in the dependent t-test or one-sided t-test

dz_prior <- 0.2 #Enter effect size dz for the prior 
sd_prior <- 0.2 #Enter sd of the effect sizes of the prior - the higher, the wider the prior is

lo <- -Inf #lower bound of support (e.g., set to 0 if effects < 0 is not possible)
hi <- Inf #upper bound of support

#specify prior
altDens=function(delta) 
  dnorm(delta,dz_prior,sd_prior)*as.integer(delta>lo)*as.integer(delta<hi)
#Normalize alternative density in case user does not, 
K=1/integrate(altDens,lower=lo,upper=hi)$value
f=function(delta) K*altDens(delta)

delta=seq(-3,3,.01)
#Plot Alternative as a density and Null as a point arrow
#png(file=paste('prior.png'),width=6000,height=4000, res = 1000)
maxAlt=max(f(delta))
plot(delta,f(delta),typ='n',xlab="Effect Size Parameter Delta",ylab="Density",ylim=c(0,1.4*maxAlt),main="Models")
arrows(0,0,0,1.3*maxAlt,col='darkblue',lwd=2)
lines(delta,f(delta),col='darkgreen',lwd=2)
legend("topright",legend=c("Null","Alternative"),col=c('darkblue','darkgreen'),lwd=2)

#dev.off()

#Prediction Function Under Null
nullPredF=function(obs,N) dt(sqrt(N)*obs,N-1)

#Prediction Function Under the Alternative
altPredIntegrand=function(delta,obs,N) 
  dt(sqrt(N)*obs,N-1,sqrt(N)*delta)*f(delta)
altPredF=function(obs,N) 
  integrate(altPredIntegrand,lower=lo,upper=hi,obs=obs,N=N)$value

obs=delta
I=length(obs)
nullPred=nullPredF(obs,N)
altPred=1:I
for (i in 1:I) altPred[i]=altPredF(obs[i],N)

#Evaluate Predicted Density at Observed Value dz
valNull=nullPredF(dz,N)
valAlt=altPredF(dz,N)

#Plot The Predictions
#png(file=paste('posterior.png'),width=6000,height=4000, res = 1000)
top=max(altPred,nullPred)
plot(type='l',obs,nullPred,ylim=c(0,top),xlab="Observed Effect Size",ylab="Density",main=paste("Bayes factor (alt/null) is ",round(valAlt/valNull,digits =3)),col='darkblue',lwd=2)
lines(obs,altPred,col='darkgreen',lwd=2)
legend("topright",legend=c("Null","Alternative"),col=c('darkblue','darkgreen'),lwd=2)
abline(v=dz,lty=2,lwd=2,col='red')
points(pch=19,c(dz,dz),c(valNull,valAlt))

#dev.off()

cat("Bayes factor (alt/null) is ",valAlt/valNull,", the t-value is ",sqrt(N)*dz," and the p-value is",2*(1-pt(abs(sqrt(N)*dz),N-1)))
## Bayes factor (alt/null) is  50.17844 , the t-value is  3.733364  and the p-value is 0.0008548068
#© Daniel Lakens, 2016. 
# This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. https://creativecommons.org/licenses/by-nc-sa/4.0/