club.df <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/12/club.txt",
sep = "\t",
header = T,
stringsAsFactors = F)
Q1 A
boxplot(time ~ gender,
data = club.df,
ylab = "club time",
xlab = "gender"
)
Q1 B
with(club.df, aggregate(time ~ gender, FUN = mean))
## gender time
## 1 F 134.4167
## 2 M 136.7292
Q1 C
time.women <- subset(club.df, subset = gender == "F")$time
time.men <- subset(club.df, subset = gender == "M")$time
test.result <- t.test(x = time.women,
y = time.men,
alternative = "two.sided"
)
test.result
##
## Welch Two Sample t-test
##
## data: time.women and time.men
## t = -0.38152, df = 297.55, p-value = 0.7031
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.240836 9.615836
## sample estimates:
## mean of x mean of y
## 134.4167 136.7292
or
q1.test <- t.test(formula = time ~ gender,
subset = gender %in% c("F", "M"),
data = club.df,
alternative = "two.sided"
)
q1.test
##
## Welch Two Sample t-test
##
## data: time by gender
## t = -0.38152, df = 297.55, p-value = 0.7031
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.240836 9.615836
## sample estimates:
## mean in group F mean in group M
## 134.4167 136.7292
Q1 D t-test: t(297.55) = -0.38, p = 0.70 # there are no difference in the amount of time women and men spend at clubs
Q1 E
test.result <- t.test(formula = time ~ gender,
subset = gender %in% c("F", "M") &
club == "Blechnerei",
data = club.df,
alternative = "two.sided"
)
test.result
##
## Welch Two Sample t-test
##
## data: time by gender
## t = 0.062752, df = 104.1, p-value = 0.9501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -20.29240 21.61866
## sample estimates:
## mean in group F mean in group M
## 140.9180 140.2549
t-test: t(104.10) = 0.06, p = 0.95 # there are no difference in the amount of time women and men spend at Blechnerei
Q2 A
boxplot(drinks ~ leavealone,
data = club.df,
ylab = "drinks",
xlab = "leavealone"
)
Q2 B
with(club.df, aggregate(drinks ~ leavealone, FUN = mean))
## leavealone drinks
## 1 0 3.577465
## 2 1 4.117904
Q2 C
q2.test <- t.test(formula = drinks ~ leavealone,
subset = leavealone %in% c("0", "1"),
data = club.df,
alternative = "two.sided"
)
q2.test
##
## Welch Two Sample t-test
##
## data: drinks by leavealone
## t = -2.6253, df = 121.18, p-value = 0.009772
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.9479793 -0.1328990
## sample estimates:
## mean in group 0 mean in group 1
## 3.577465 4.117904
Q2 D t-test: t(121.18) = -2.62, p = 0.09 # there are difference in the amount of drinks people had when they went home alone versus not alone.
Q2 E
q2.test <- t.test(formula = drinks ~ leavealone,
subset = leavealone %in% c("0", "1") &
gender == "F",
data = club.df,
alternative = "two.sided"
)
q2.test
##
## Welch Two Sample t-test
##
## data: drinks by leavealone
## t = -1.3791, df = 53.466, p-value = 0.1736
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.9844944 0.1821801
## sample estimates:
## mean in group 0 mean in group 1
## 3.352941 3.754098
t-test: t(53.466) = -1.37, p = 0.17 # there are no differences in the amount of drinks women had when they went home alone versus not alone.
Q3 A
apa <- function(test.object, tails = 2, sig.digits = 2, p.lb = .01) {
statistic.id <- substr(names(test.object$statistic), start = 1, stop = 1)
p.value <- test.object$p.value
if(tails == 1) {p.value <- p.value / 2}
if (p.value < p.lb) {p.display <- paste("p < ", p.lb, " (", tails, "-tailed)", sep = "")}
if (p.value > p.lb) {p.display <- paste("p = ", round(p.value, sig.digits), " (", tails, "-tailed)", sep = "")}
add.par <- ""
if(grepl("product-moment", test.object$method)) {
estimate.display <- paste("r = ", round(test.object$estimate, sig.digits), ", ", sep = "")
}
if(grepl("Chi", test.object$method)) {
estimate.display <- ""
add.par <- paste(", N = ", sum(test.object$observed), sep = "")
}
if(grepl("One Sample t-test", test.object$method)) {
estimate.display <- paste("mean = ", round(test.object$estimate, sig.digits), ", ", sep = "")
}
if(grepl("Two Sample t-test", test.object$method)) {
estimate.display <- paste("mean difference = ", round(test.object$estimate[2] - test.object$estimate[1], sig.digits), ", ", sep = "")
}
return(paste(
estimate.display,
statistic.id,
"(",
round(test.object$parameter, sig.digits),
add.par,
") = ",
round(test.object$statistic, sig.digits),
", ",
p.display,
sep = ""
)
)
}
Q3 B
apa(q1.test)
## [1] "mean difference = 2.31, t(297.55) = -0.38, p = 0.7 (2-tailed)"
apa(q2.test)
## [1] "mean difference = 0.4, t(53.47) = -1.38, p = 0.17 (2-tailed)"
Q4 A
plot(x = club.df$drinks,
y = club.df$time,
xlab = "drinks",
ylab = "time",
main = "the relationship between drinks and time"
)
Q4 B
with(club.df, aggregate(drinks ~ time, FUN = mean))
## time drinks
## 1 -7 0.000000
## 2 3 5.000000
## 3 6 2.000000
## 4 12 4.000000
## 5 17 2.000000
## 6 23 4.000000
## 7 33 2.000000
## 8 35 4.000000
## 9 39 4.000000
## 10 40 3.500000
## 11 43 4.500000
## 12 44 3.000000
## 13 45 4.000000
## 14 50 2.000000
## 15 51 1.000000
## 16 52 2.500000
## 17 53 1.000000
## 18 56 1.500000
## 19 59 3.000000
## 20 63 3.500000
## 21 64 3.000000
## 22 65 5.000000
## 23 66 5.000000
## 24 67 4.000000
## 25 68 4.000000
## 26 69 2.000000
## 27 71 2.000000
## 28 72 3.400000
## 29 74 2.000000
## 30 77 3.000000
## 31 79 1.000000
## 32 80 5.000000
## 33 83 3.333333
## 34 85 3.666667
## 35 86 0.000000
## 36 87 2.000000
## 37 88 4.000000
## 38 89 3.666667
## 39 90 3.666667
## 40 91 0.500000
## 41 92 3.000000
## 42 93 5.000000
## 43 94 4.000000
## 44 97 5.000000
## 45 100 2.500000
## 46 101 2.500000
## 47 102 4.333333
## 48 103 2.000000
## 49 104 5.000000
## 50 105 4.000000
## 51 106 4.500000
## 52 108 3.000000
## 53 109 4.000000
## 54 110 5.000000
## 55 111 4.000000
## 56 112 4.000000
## 57 113 3.833333
## 58 114 4.666667
## 59 116 4.500000
## 60 117 3.000000
## 61 118 2.500000
## 62 119 5.500000
## 63 120 3.333333
## 64 121 2.000000
## 65 122 4.333333
## 66 123 4.000000
## 67 124 3.666667
## 68 125 4.250000
## 69 127 5.000000
## 70 129 5.000000
## 71 130 3.333333
## 72 131 4.500000
## 73 132 4.250000
## 74 133 6.000000
## 75 134 4.500000
## 76 135 4.000000
## 77 136 3.500000
## 78 137 5.000000
## 79 138 3.000000
## 80 141 5.333333
## 81 143 3.500000
## 82 145 5.000000
## 83 146 4.000000
## 84 147 4.000000
## 85 148 3.250000
## 86 149 6.000000
## 87 150 5.000000
## 88 151 3.600000
## 89 152 4.500000
## 90 153 4.000000
## 91 154 5.000000
## 92 155 5.000000
## 93 156 3.000000
## 94 157 4.000000
## 95 158 3.800000
## 96 159 6.000000
## 97 160 5.000000
## 98 161 3.666667
## 99 162 4.000000
## 100 164 5.000000
## 101 166 2.500000
## 102 167 4.250000
## 103 168 4.000000
## 104 169 2.000000
## 105 171 5.000000
## 106 172 1.500000
## 107 173 4.500000
## 108 174 4.000000
## 109 175 4.000000
## 110 176 5.000000
## 111 177 5.000000
## 112 178 3.500000
## 113 179 3.333333
## 114 180 6.000000
## 115 181 4.333333
## 116 183 3.500000
## 117 185 3.500000
## 118 186 4.000000
## 119 188 0.000000
## 120 189 5.000000
## 121 190 5.000000
## 122 191 5.000000
## 123 192 5.000000
## 124 193 5.000000
## 125 194 4.000000
## 126 195 4.000000
## 127 197 3.000000
## 128 198 4.000000
## 129 199 5.000000
## 130 201 7.000000
## 131 202 5.000000
## 132 204 8.000000
## 133 208 5.000000
## 134 212 6.000000
## 135 214 5.000000
## 136 215 7.000000
## 137 216 4.000000
## 138 217 6.000000
## 139 218 6.000000
## 140 219 4.000000
## 141 220 5.000000
## 142 221 5.000000
## 143 222 4.000000
## 144 226 5.000000
## 145 228 3.000000
## 146 232 7.000000
## 147 240 5.000000
## 148 245 8.000000
## 149 246 4.000000
## 150 249 5.500000
## 151 252 4.000000
## 152 256 4.000000
## 153 258 9.000000
## 154 268 3.000000
Q4 C
q4.test <- cor.test(x = club.df$drinks,
y = club.df$time
)
q4.test
##
## Pearson's product-moment correlation
##
## data: club.df$drinks and club.df$time
## t = 6.6984, df = 298, p-value = 1.05e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2591255 0.4562998
## sample estimates:
## cor
## 0.3617512
Q4 D correlation test: r(298) = 6.69, p = 1.05 # there is a strong correlation between people stay at the club for each drink amount
Q4 E
```