club.df <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/12/club.txt",
sep = "\t",
header = T,
stringsAsFactors = F)
#Create a plot (e.g. boxplot or beanplot) showing the distribution of club times for males and females
boxplot(time ~ gender,
data = club.df,
ylab = "time",
xlab = "gender",
main = "distribution of gender and time"
)
#Using grouped aggregation (e.g.; aggregate or dplyr), calculate the mean number of minutes that men and women stayed at the club(s)
with(club.df, aggregate(time ~ gender, FUN = mean))
## gender time
## 1 F 134.4167
## 2 M 136.7292
#Conduct a two-tailed t-test testing whether or not there is a significant difference in the amount of time women and men spend at clubs. Save the result as an object called q1.test
q1.test <- t.test(formula = time ~ gender,
subset = gender %in% c("F", "M"),
data = club.df,
alternative = "two.sided"
)
q1.test
##
## Welch Two Sample t-test
##
## data: time by gender
## t = -0.38152, df = 297.55, p-value = 0.7031
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.240836 9.615836
## sample estimates:
## mean in group F mean in group M
## 134.4167 136.7292
#Write your conclusion in APA format. Be sure to address my friend’s claim that women are werewolves.
#t-test: t(297.55) = -0.38152, p = 0.7031.
#Do the results change if you only look at people who were at the Blechnerei? Using only the Blechnerei data, repeat the test and write your conclusion in APA format (Hint: Use subset()!)
q1.test <- t.test(formula = time ~ gender,
subset = gender %in% c("F", "M") & club == "Blechnerei",
data = club.df,
alternative = "two.sided"
)
q1.test
##
## Welch Two Sample t-test
##
## data: time by gender
## t = 0.062752, df = 104.1, p-value = 0.9501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -20.29240 21.61866
## sample estimates:
## mean in group F mean in group M
## 140.9180 140.2549
#t-test: t(297.55) = -0.38152, p = 0.7031.
#Create a plot (e.g. boxplot or beanplot) showing the distribution of drinks for people that did and did not leave alone
boxplot(drinks ~ leavealone,
data = club.df,
ylab = "acnout of drinks",
xlab = "leave alone",
main = "distribution of drinks and (not) leave alone"
)
#Using grouped aggregation (e.g.; aggregate or dplyr), calculate the mean number of minutes that men and women stayed at the club(s)
with(club.df, aggregate(drinks ~ leavealone, FUN = mean))
## leavealone drinks
## 1 0 3.577465
## 2 1 4.117904
#Conduct a two-tailed t-test testing whether or not there is a significant difference in the amount of drinks people had when they went home alone versus not alone. Save the result as an object called q2.test
q2.test <- t.test(formula = drinks ~ leavealone,
subset = leavealone %in% c(0, 1),
data = club.df,
alternative = "two.sided"
)
q2.test
##
## Welch Two Sample t-test
##
## data: drinks by leavealone
## t = -2.6253, df = 121.18, p-value = 0.009772
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.9479793 -0.1328990
## sample estimates:
## mean in group 0 mean in group 1
## 3.577465 4.117904
#Write your conclusion in APA format.
#t-test: t(121.18) = -2.6253, p = 0.009772
#Do the results change if you ignore Males and only test Females? Using only the Female data, repeat the test and write your conclusion in APA format (Hint: Use subset()!)
q2.test <- t.test(formula = drinks ~ leavealone,
subset = leavealone %in% c(0, 1) & gender == "F",
data = club.df,
alternative = "two.sided"
)
q2.test
##
## Welch Two Sample t-test
##
## data: drinks by leavealone
## t = -1.3791, df = 53.466, p-value = 0.1736
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.9844944 0.1821801
## sample estimates:
## mean in group 0 mean in group 1
## 3.352941 3.754098
# Conclusion in APA:
#t-test: t(53.466) = -1.3791, p = 0.1736
#PA style conclusion as an output
apa <- function(test.object, tails = 2, sig.digits = 2, p.lb = .01) {
statistic.id <- substr(names(test.object$statistic), start = 1, stop = 1)
p.value <- test.object$p.value
if(tails == 1) {p.value <- p.value / 2}
if (p.value < p.lb) {p.display <- paste("p < ", p.lb, " (", tails, "-tailed)", sep = "")}
if (p.value > p.lb) {p.display <- paste("p = ", round(p.value, sig.digits), " (", tails, "-tailed)", sep = "")}
add.par <- ""
if(grepl("product-moment", test.object$method)) {
estimate.display <- paste("r = ", round(test.object$estimate, sig.digits), ", ", sep = "")
}
if(grepl("Chi", test.object$method)) {
estimate.display <- ""
add.par <- paste(", N = ", sum(test.object$observed), sep = "")
}
if(grepl("One Sample t-test", test.object$method)) {
estimate.display <- paste("mean = ", round(test.object$estimate, sig.digits), ", ", sep = "")
}
if(grepl("Two Sample t-test", test.object$method)) {
estimate.display <- paste("mean difference = ", round(test.object$estimate[2] - test.object$estimate[1], sig.digits), ", ", sep = "")
}
return(paste(
estimate.display,
statistic.id,
"(",
round(test.object$parameter, sig.digits),
add.par,
") = ",
round(test.object$statistic, sig.digits),
", ",
p.display,
sep = ""
)
)
}
apa(q1.test)
## [1] "mean difference = -0.66, t(104.1) = 0.06, p = 0.95 (2-tailed)"
apa(q2.test)
## [1] "mean difference = 0.4, t(53.47) = -1.38, p = 0.17 (2-tailed)"
#Create a plot (e.g. scatterplot) showing the relationship between drinks and time
plot(x = club.df$drinks,
y = club.df$time,
xlab = "Drinks",
ylab = "time",
main= "drinks and time")
#Using grouped aggregation (e.g.; aggregate or dplyr), calculate the mean number of minutes that people stay at the club for each drink amount.
with(club.df, aggregate(time ~ drinks, FUN = mean))
## drinks time
## 1 0 85.40000
## 2 1 115.84615
## 3 2 97.03226
## 4 3 129.49123
## 5 4 136.85542
## 6 5 144.95522
## 7 6 155.31034
## 8 7 174.63636
## 9 8 194.00000
## 10 9 258.00000
#Is the difference significant? Conduct a correlation test and save the result as an object called q4.test
q4.test <- cor.test(x = club.df$drinks, y = club.df$time)
q4.test
##
## Pearson's product-moment correlation
##
## data: club.df$drinks and club.df$time
## t = 6.6984, df = 298, p-value = 1.05e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2591255 0.4562998
## sample estimates:
## cor
## 0.3617512
Write your result in APA format. r(298) = 0.3617512, p = 1.05e-10
#Repeat the test but only for females at Blechnerei. Do you get the same conclusion? Write the results of this test in APA format
q4.test <- cor.test(x = club.df$drinks, y = club.df$time, subset = gender == "F" & club == "Blechnerei")
q4.test
##
## Pearson's product-moment correlation
##
## data: club.df$drinks and club.df$time
## t = 6.6984, df = 298, p-value = 1.05e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2591255 0.4562998
## sample estimates:
## cor
## 0.3617512
#Conlcusion in APA format
#r(298) = 0.3617512, p = 1.05e-10