Data: Club.df
club.df <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/12/club.txt",
sep = "\t",
header = T,
stringsAsFactors = F)
Question 1
boxplot(time ~ gender,
data = club.df,
ylim = c(0, 270),
ylab = "Time (min.)",
xlab = "Gender",
main = "Distribution of club time")

aggregate(time ~ gender,
data = club.df,
FUN = mean)
## gender time
## 1 F 134.4167
## 2 M 136.7292
q1.test <- t.test(formula = time ~ gender,
data = club.df)
q1.test$statistic
## t
## -0.3815224
q1.test$parameter
## df
## 297.5547
q1.test$p.value
## [1] 0.703088
# There is no sign. difference in the amount of time women and men spend at clubs, (t(297.55)= -0.38, p = 0.7).
t.test(formula = time ~ gender,
data = club.df,
subset = club == "Blechnerei")
##
## Welch Two Sample t-test
##
## data: time by gender
## t = 0.062752, df = 104.1, p-value = 0.9501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -20.29240 21.61866
## sample estimates:
## mean in group F mean in group M
## 140.9180 140.2549
# No the results didn't change, there is still no sign. difference in the amount of time women and men spend at the club (Blechnerei), (t(104.10)= 0.06, p = 0.95).
Question 2
boxplot(drinks ~ leavealone,
data = club.df,
ylim = c(0, 10),
ylab = "Number of Drinks",
xlab = "Leaving alone yes (0) vs no (1)",
main = "Distribution of Drinks")

aggregate(formula = drinks ~ leavealone,
data = club.df,
FUN = mean)
## leavealone drinks
## 1 0 3.577465
## 2 1 4.117904
q2.test <- t.test(formula = drinks ~ leavealone,
data = club.df)
q2.test$statistic
## t
## -2.625326
q2.test$parameter
## df
## 121.1829
q2.test$p.value
## [1] 0.009772036
# There is a significant difference in the amount of drinks people had when they went home alone vs. not alone, (t(121.18)= -2.63, p < 0.01).
club.df2 <- subset(club.df, gender == "F")
q2.etest <- t.test(x = club.df2$drinks,
y = club.df2$leavealone,
alternative = "two.sided")
q2.etest$statistic
## t
## 22.91244
q2.etest$parameter
## df
## 177.983
q2.etest$p.value
## [1] 5.629867e-55
# Yes I got the same conclusion when I just test this hypothesis on women, that there is a sign. difference, (t(177.98)= 22.912, p < 0.01)
Question 3
apa <- function(test.object, tails = 2, sig.digits = 2, p.lb = .01) {
statistic.id <- substr(names(test.object$statistic), start = 1, stop = 1)
p.value <- test.object$p.value
if(tails == 1) {p.value <- p.value / 2}
if (p.value < p.lb) {p.display <- paste("p < ", p.lb, " (", tails, "-tailed)", sep = "")}
if (p.value > p.lb) {p.display <- paste("p = ", round(p.value, sig.digits), " (", tails, "-tailed)", sep = "")}
add.par <- ""
if(grepl("product-moment", test.object$method)) {
estimate.display <- paste("r = ", round(test.object$estimate, sig.digits), ", ", sep = "")
}
if(grepl("Chi", test.object$method)) {
estimate.display <- ""
add.par <- paste(", N = ", sum(test.object$observed), sep = "")
}
if(grepl("One Sample t-test", test.object$method)) {
estimate.display <- paste("mean = ", round(test.object$estimate, sig.digits), ", ", sep = "")
}
if(grepl("Two Sample t-test", test.object$method)) {
estimate.display <- paste("mean difference = ", round(test.object$estimate[2] - test.object$estimate[1], sig.digits), ", ", sep = "")
}
return(paste(
estimate.display,
statistic.id,
"(",
round(test.object$parameter, sig.digits),
add.par,
") = ",
round(test.object$statistic, sig.digits),
", ",
p.display,
sep = ""
)
)
}
apa(q1.test)
## [1] "mean difference = 2.31, t(297.55) = -0.38, p = 0.7 (2-tailed)"
apa(q2.test)
## [1] "mean difference = 0.54, t(121.18) = -2.63, p < 0.01 (2-tailed)"
# Yes the results match with my answers.
Question 4
plot(y = club.df$time,
x = club.df$drinks,
ylab = "Time (min.)",
xlab = "Number of drinks",
ylim = c(0, 270),
xlim = c(0, 10),
main = "Relationship between Drinks and Time")

aggregate(formula = time ~ drinks,
data = club.df,
FUN = mean)
## drinks time
## 1 0 85.40000
## 2 1 115.84615
## 3 2 97.03226
## 4 3 129.49123
## 5 4 136.85542
## 6 5 144.95522
## 7 6 155.31034
## 8 7 174.63636
## 9 8 194.00000
## 10 9 258.00000
q4.test <- cor.test(x = club.df$time,
y = club.df$drinks)
q4.test$estimate
## cor
## 0.3617512
q4.test$parameter
## df
## 298
q4.test$p.value
## [1] 1.049536e-10
# There is a sign. positive correlation between drinks and time spend at the club, (r(298)= 0.36, p< 0.01).
femclub.df <- subset(club.df, gender == "F" & club == "Blechnerei")
cor.test(x = femclub.df$time,
y = femclub.df$drinks)
##
## Pearson's product-moment correlation
##
## data: femclub.df$time and femclub.df$drinks
## t = 2.7597, df = 59, p-value = 0.007695
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.09433171 0.54365162
## sample estimates:
## cor
## 0.3381205
# Yes there is still a positive correlation between drinks and time spend at the club for females who went to Blechnerei, (r(59)= 0.34, p< 0.01).
Question 5
club.df$gender2.log[club.df$gender == "M"] <- 1
club.df$gender2.log[club.df$gender == "F"] <- 0
aggregate(formula = gender2.log ~ club,
data = club.df,
FUN = mean)
## club gender2.log
## 1 Barrys 0.3700000
## 2 Blechnerei 0.4553571
## 3 Kantine 0.6363636
agg.result <- aggregate(formula = gender2.log ~ club,
data = club.df,
FUN = mean)
barplot(height = agg.result$gender2.log,
names = agg.result$club,
ylim = c(0, 1),
col = "royalblue3",
ylab = "% Males",
xlab = "Clubs")

q5.test <- chisq.test(x = club.df$gender,
y = club.df$club)
q5.test$statistic
## X-squared
## 13.7403
q5.test$parameter
## df
## 2
q5.test$p.value
## [1] 0.001038323
# There is a sign. relationship between club and gender,(X2(2)= 13.74, p< 0.01).
club.df3 <- subset(club.df, club %in% c("Barrys", "Kantine"))
chisq.test(x = club.df3$gender,
y = club.df3$club)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: club.df3$gender and club.df3$club
## X-squared = 12.241, df = 1, p-value = 0.0004674
# Yes there was a sign. difference between just Barrys and Kantine, (X2(1)= 12.24, p< 0.01).
Question 6
aggregate(formula = leavealone ~ gender,
data = club.df,
FUN = mean)
## gender leavealone
## 1 F 0.7820513
## 2 M 0.7430556
agg.result1 <- aggregate(formula = leavealone ~ gender,
data = club.df,
FUN = mean)
barplot(height = agg.result1$leavealone,
names = agg.result1$gender,
ylim = c(0, 1),
col = "royalblue3",
ylab = "% People leaving alone",
xlab = "Gender")

q6.test <- chisq.test(x = club.df$gender,
y = club.df$leavealone)
q6.test$statistic
## X-squared
## 0.4329259
q6.test$parameter
## df
## 1
q6.test$p.value
## [1] 0.5105567
# There is no sign. relationship between leaving the club alone and gender,(X2(1)= 0.43, p= 0.51).
club.df60 <- subset(club.df, time > 60)
chisq.test(x = club.df60$gender,
y = club.df60$leavealone)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: club.df60$gender and club.df60$leavealone
## X-squared = 0.88492, df = 1, p-value = 0.3469
# Yes my conclusion that there is no sign. relationship between leaving the club alone and gender holds when just including people who stayed more than 60min. at the club, (X2(1)= 0.88, p= 0.35).