Lab 6 Lab Manual Exercises

6.4.1

Loading the data

library(data.table)
all_data <- fread("https://raw.githubusercontent.com/CrumpLab/statisticsLab/master/data/MehrSongSpelke2016.csv")

6.4.2

Inspect the data frame

library(summarytools)
view(dfSummary(all_data))

6.4.3

Get data for experiment one (baseline)

library(dplyr)
experiment_one <- all_data %>% filter(exp1==1)

6.4.4

Prep for one sample t-test

baseline <- experiment_one$Baseline_Proportion_Gaze_to_Singer

plot(baseline)

hist(baseline)

Look at descriptives

mean(baseline)
## [1] 0.5210967
sd(baseline)
## [1] 0.1769651

Conduct t-test

t.test(baseline, mu=.5)
## 
##  One Sample t-test
## 
## data:  baseline
## t = 0.67438, df = 31, p-value = 0.5051
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.4572940 0.5848994
## sample estimates:
## mean of x 
## 0.5210967

6.4.5

Get data for experiment two (test phase)

test_phase <- experiment_one$Test_Proportion_Gaze_to_Singer

plot(test_phase)

hist(test_phase)

Look at descriptives

mean(test_phase)
## [1] 0.5934913
sd(test_phase)
## [1] 0.1786884

Conduct t-test

t.test(test_phase, mu = .5)
## 
##  One Sample t-test
## 
## data:  test_phase
## t = 2.9597, df = 31, p-value = 0.005856
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5290672 0.6579153
## sample estimates:
## mean of x 
## 0.5934913

6.4.6

Paired-samples t-test

t.test(test_phase, baseline, paired=TRUE, var.equal=TRUE)
## 
##  Paired t-test
## 
## data:  test_phase and baseline
## t = 2.4164, df = 31, p-value = 0.02175
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01129217 0.13349698
## sample estimates:
## mean of the differences 
##              0.07239458

Relationship between one-sample and paired sample t-test

t.test(test_phase, baseline, paired=TRUE, var.equal=TRUE)
## 
##  Paired t-test
## 
## data:  test_phase and baseline
## t = 2.4164, df = 31, p-value = 0.02175
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01129217 0.13349698
## sample estimates:
## mean of the differences 
##              0.07239458
difference_scores<-test_phase-baseline
t.test(difference_scores, mu=0)
## 
##  One Sample t-test
## 
## data:  difference_scores
## t = 2.4164, df = 31, p-value = 0.02175
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.01129217 0.13349698
## sample estimates:
##  mean of x 
## 0.07239458
difference_scores <- test_phase-baseline
length(difference_scores[difference_scores>0])
## [1] 22

6.4.7

Make the dataframe for plotting

Phase <- rep(c("Baseline","Test"), each = 32)
Proportions <- c(baseline,test_phase)
plot_df <- data.frame(Phase,Proportions)

Plot data

library(ggplot2)

mean_df <- aggregate(Proportions ~ Phase, plot_df, mean)

difference_scores <- baseline-test_phase #calculate difference scores
standard_error <- sd(difference_scores)/sqrt(length(difference_scores)) #calculate SEM

ggplot(plot_df, aes(x=Phase, y=Proportions))+ 
  geom_bar(data=mean_df, stat="identity",aes(fill=Phase))+
  geom_errorbar(data=mean_df, aes(ymin=Proportions-standard_error, 
                                  ymax=Proportions+standard_error), width=.1) +
  geom_point(alpha=.25)

difference_scores <- test_phase-baseline #calculate difference scores
standard_error <- sd(difference_scores)/sqrt(length(difference_scores)) #calculate SEM
mean_difference <- mean(difference_scores)

qplot(x="MeanDifference", y=mean_difference)+
  geom_bar(stat="identity", width=.5, alpha=.5)+
  geom_hline(yintercept=0)+
  geom_point(aes(y=difference_scores), alpha=.25)+
  geom_errorbar(aes(ymin=mean_difference-standard_error, 
                                  ymax=mean_difference+standard_error), width=.1)

t_test_results <- t.test(difference_scores)
lower_interval<- t_test_results$conf.int[1]
upper_interval<- t_test_results$conf.int[2]

qplot(x="MeanDifference", y=mean_difference)+
  geom_bar(stat="identity", width=.5, alpha=.5)+
  geom_hline(yintercept=0)+
  geom_point(aes(y=difference_scores), alpha=.25)+
  geom_errorbar(aes(ymin=lower_interval, 
                                  ymax=upper_interval), width=.1)

6.4.8

Data simulation

sample_sd   <- (sd(baseline)+sd(test_phase))/2

simulated_means <- length(1000)
for(i in 1:1000){
 simulated_means[i] <- mean(rnorm(32,.5, sample_sd))
}

hist(simulated_means)

sample_sd   <- sd(baseline-test_phase)

simulated_mean_difference <- length(1000)
for(i in 1:1000){
 simulated_mean_difference[i] <- mean(rnorm(32,0, sample_sd))
}

hist(simulated_mean_difference)

Lab 6 Generalization Exercises

Condition_A <- rnorm(20,100,25)
Condition_B <- rnorm(20,100,25)

Conduct a paired samples t-test on sample data stored in the variables Condition_A, and Condition_B. Report the results of the t-test, and the mean difference between conditions

t.test(Condition_A, Condition_B, paired=TRUE, var.equal=TRUE)
## 
##  Paired t-test
## 
## data:  Condition_A and Condition_B
## t = -0.133, df = 19, p-value = 0.8956
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -19.25654  16.95541
## sample estimates:
## mean of the differences 
##               -1.150564

Could also compute the mean difference with the code below.

diff_scores<-Condition_A-Condition_B
t.test(diff_scores, mu=0)
## 
##  One Sample t-test
## 
## data:  diff_scores
## t = -0.133, df = 19, p-value = 0.8956
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -19.25654  16.95541
## sample estimates:
## mean of x 
## -1.150564

The above code assumes no difference in the means between Condition A and Condition B. Both samples come from the same normal distribution. Change the mean for one of the distributions. Make the change large enough so that you find a significant p-value (p < 0.05) when you conduct the t-test on the new simulated data. Report the t-test and the means for each condition

Condition_A <- rnorm(20,100,25)
Condition_BB <- rnorm(20,130,25)
mean(Condition_A)
## [1] 91.59698
mean(Condition_BB)
## [1] 125.3685
t.test(Condition_A, Condition_BB, paired=TRUE, var.equal=TRUE)
## 
##  Paired t-test
## 
## data:  Condition_A and Condition_BB
## t = -4.3815, df = 19, p-value = 0.000321
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -49.90419 -17.63890
## sample estimates:
## mean of the differences 
##               -33.77155
diff_scores<-Condition_A-Condition_BB
t.test(diff_scores, mu=0)
## 
##  One Sample t-test
## 
## data:  diff_scores
## t = -4.3815, df = 19, p-value = 0.000321
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -49.90419 -17.63890
## sample estimates:
## mean of x 
## -33.77155

Lab 6 Written Answer Questions

  1. A t value gives a measure of confidence, such that a larger t value gives greater evidence that the null hypothesis should be rejected. The t value is a property of your data. Instead of solely relying on the sample mean and standard error to gain insights into your data, you may use the t value to demonstrate the difference between your sample mean and population mean, divided by the standard error of the mean (one-sample formula). Thus, t values go beyond means and standard errors.

  2. Using an alpha criterion of 0.05, any p value that is greater than 0.05 is not statistically significant. A p value of 0.25 tell us that a null distribution would produce the obtained t value or larger 25% of the time (assuming the obtained t value is positive).

  3. The critical t value is associated with the alpha criterion. In a two-tailed test using an alpha of 0.05, a critical t value of 2.6 demonstrates that t values greater than +/- 2.6 occur 5% of the time. In other words, t values of 2.6 or greater will occur 2.5% of the time, and t values of -2.6 or smaller will occur 2.5% of the time.

  4. A one-sample t test is used to estimate the likelihood that one sample came from a particular population. One-sample t tests use the known population parameters, or the estimated population parameters when the true parameters are unknown. A paired-sample t test is very similar to the one-sample t test, in that paired t tests calculate the differences between paired observations before computing a one-sample t test on the difference between the observations.