Loading the data
library(data.table)
all_data <- fread("https://raw.githubusercontent.com/CrumpLab/statisticsLab/master/data/MehrSongSpelke2016.csv")
Inspect the data frame
library(summarytools)
view(dfSummary(all_data))
Get data for experiment one (baseline)
library(dplyr)
experiment_one <- all_data %>% filter(exp1==1)
Prep for one sample t-test
baseline <- experiment_one$Baseline_Proportion_Gaze_to_Singer
plot(baseline)
hist(baseline)
Look at descriptives
mean(baseline)
## [1] 0.5210967
sd(baseline)
## [1] 0.1769651
Conduct t-test
t.test(baseline, mu=.5)
##
## One Sample t-test
##
## data: baseline
## t = 0.67438, df = 31, p-value = 0.5051
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
## 0.4572940 0.5848994
## sample estimates:
## mean of x
## 0.5210967
Get data for experiment two (test phase)
test_phase <- experiment_one$Test_Proportion_Gaze_to_Singer
plot(test_phase)
hist(test_phase)
Look at descriptives
mean(test_phase)
## [1] 0.5934913
sd(test_phase)
## [1] 0.1786884
Conduct t-test
t.test(test_phase, mu = .5)
##
## One Sample t-test
##
## data: test_phase
## t = 2.9597, df = 31, p-value = 0.005856
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
## 0.5290672 0.6579153
## sample estimates:
## mean of x
## 0.5934913
Paired-samples t-test
t.test(test_phase, baseline, paired=TRUE, var.equal=TRUE)
##
## Paired t-test
##
## data: test_phase and baseline
## t = 2.4164, df = 31, p-value = 0.02175
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01129217 0.13349698
## sample estimates:
## mean of the differences
## 0.07239458
Relationship between one-sample and paired sample t-test
t.test(test_phase, baseline, paired=TRUE, var.equal=TRUE)
##
## Paired t-test
##
## data: test_phase and baseline
## t = 2.4164, df = 31, p-value = 0.02175
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01129217 0.13349698
## sample estimates:
## mean of the differences
## 0.07239458
difference_scores<-test_phase-baseline
t.test(difference_scores, mu=0)
##
## One Sample t-test
##
## data: difference_scores
## t = 2.4164, df = 31, p-value = 0.02175
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01129217 0.13349698
## sample estimates:
## mean of x
## 0.07239458
difference_scores <- test_phase-baseline
length(difference_scores[difference_scores>0])
## [1] 22
Make the dataframe for plotting
Phase <- rep(c("Baseline","Test"), each = 32)
Proportions <- c(baseline,test_phase)
plot_df <- data.frame(Phase,Proportions)
Plot data
library(ggplot2)
mean_df <- aggregate(Proportions ~ Phase, plot_df, mean)
difference_scores <- baseline-test_phase #calculate difference scores
standard_error <- sd(difference_scores)/sqrt(length(difference_scores)) #calculate SEM
ggplot(plot_df, aes(x=Phase, y=Proportions))+
geom_bar(data=mean_df, stat="identity",aes(fill=Phase))+
geom_errorbar(data=mean_df, aes(ymin=Proportions-standard_error,
ymax=Proportions+standard_error), width=.1) +
geom_point(alpha=.25)
difference_scores <- test_phase-baseline #calculate difference scores
standard_error <- sd(difference_scores)/sqrt(length(difference_scores)) #calculate SEM
mean_difference <- mean(difference_scores)
qplot(x="MeanDifference", y=mean_difference)+
geom_bar(stat="identity", width=.5, alpha=.5)+
geom_hline(yintercept=0)+
geom_point(aes(y=difference_scores), alpha=.25)+
geom_errorbar(aes(ymin=mean_difference-standard_error,
ymax=mean_difference+standard_error), width=.1)
t_test_results <- t.test(difference_scores)
lower_interval<- t_test_results$conf.int[1]
upper_interval<- t_test_results$conf.int[2]
qplot(x="MeanDifference", y=mean_difference)+
geom_bar(stat="identity", width=.5, alpha=.5)+
geom_hline(yintercept=0)+
geom_point(aes(y=difference_scores), alpha=.25)+
geom_errorbar(aes(ymin=lower_interval,
ymax=upper_interval), width=.1)
Data simulation
sample_sd <- (sd(baseline)+sd(test_phase))/2
simulated_means <- length(1000)
for(i in 1:1000){
simulated_means[i] <- mean(rnorm(32,.5, sample_sd))
}
hist(simulated_means)
sample_sd <- sd(baseline-test_phase)
simulated_mean_difference <- length(1000)
for(i in 1:1000){
simulated_mean_difference[i] <- mean(rnorm(32,0, sample_sd))
}
hist(simulated_mean_difference)
Condition_A <- rnorm(20,100,25)
Condition_B <- rnorm(20,100,25)
Conduct a paired samples t-test on sample data stored in the variables Condition_A, and Condition_B. Report the results of the t-test, and the mean difference between conditions
t.test(Condition_A, Condition_B, paired=TRUE, var.equal=TRUE)
##
## Paired t-test
##
## data: Condition_A and Condition_B
## t = -0.133, df = 19, p-value = 0.8956
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -19.25654 16.95541
## sample estimates:
## mean of the differences
## -1.150564
Could also compute the mean difference with the code below.
diff_scores<-Condition_A-Condition_B
t.test(diff_scores, mu=0)
##
## One Sample t-test
##
## data: diff_scores
## t = -0.133, df = 19, p-value = 0.8956
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -19.25654 16.95541
## sample estimates:
## mean of x
## -1.150564
The above code assumes no difference in the means between Condition A and Condition B. Both samples come from the same normal distribution. Change the mean for one of the distributions. Make the change large enough so that you find a significant p-value (p < 0.05) when you conduct the t-test on the new simulated data. Report the t-test and the means for each condition
Condition_A <- rnorm(20,100,25)
Condition_BB <- rnorm(20,130,25)
mean(Condition_A)
## [1] 91.59698
mean(Condition_BB)
## [1] 125.3685
t.test(Condition_A, Condition_BB, paired=TRUE, var.equal=TRUE)
##
## Paired t-test
##
## data: Condition_A and Condition_BB
## t = -4.3815, df = 19, p-value = 0.000321
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -49.90419 -17.63890
## sample estimates:
## mean of the differences
## -33.77155
diff_scores<-Condition_A-Condition_BB
t.test(diff_scores, mu=0)
##
## One Sample t-test
##
## data: diff_scores
## t = -4.3815, df = 19, p-value = 0.000321
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -49.90419 -17.63890
## sample estimates:
## mean of x
## -33.77155
A t value gives a measure of confidence, such that a larger t value gives greater evidence that the null hypothesis should be rejected. The t value is a property of your data. Instead of solely relying on the sample mean and standard error to gain insights into your data, you may use the t value to demonstrate the difference between your sample mean and population mean, divided by the standard error of the mean (one-sample formula). Thus, t values go beyond means and standard errors.
Using an alpha criterion of 0.05, any p value that is greater than 0.05 is not statistically significant. A p value of 0.25 tell us that a null distribution would produce the obtained t value or larger 25% of the time (assuming the obtained t value is positive).
The critical t value is associated with the alpha criterion. In a two-tailed test using an alpha of 0.05, a critical t value of 2.6 demonstrates that t values greater than +/- 2.6 occur 5% of the time. In other words, t values of 2.6 or greater will occur 2.5% of the time, and t values of -2.6 or smaller will occur 2.5% of the time.
A one-sample t test is used to estimate the likelihood that one sample came from a particular population. One-sample t tests use the known population parameters, or the estimated population parameters when the true parameters are unknown. A paired-sample t test is very similar to the one-sample t test, in that paired t tests calculate the differences between paired observations before computing a one-sample t test on the difference between the observations.