library(yarrr)
## Loading required package: jpeg
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## Die folgenden Objekte sind maskiert von 'package:stats':
##
## filter, lag
##
## Die folgenden Objekte sind maskiert von 'package:base':
##
## intersect, setdiff, setequal, union
club.df <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/12/club.txt",
sep = "\t",
header = T,
stringsAsFactors = F)
Question 1
boxplot(time ~ gender,
data = club.df,
xlab = "Gender",
ylab = "Time",
main = "Distribution of club times for males and females")

aggregate(formula = time ~ gender,
FUN = mean,
na.rm = T,
data = club.df
)
## gender time
## 1 F 134.4167
## 2 M 136.7292
q1.test <- t.test(formula = time ~ gender,
data = club.df,
alternative = "two.sided")
t-test: t(298) = -0.38, p = .70
Women stay same long in clubs as men, probably women are vampires
t.test(formula = time ~ gender,
subset = club == "Blechnerei",
data = club.df,
alternative = "two.sided"
)
##
## Welch Two Sample t-test
##
## data: time by gender
## t = 0.062752, df = 104.1, p-value = 0.9501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -20.29240 21.61866
## sample estimates:
## mean in group F mean in group M
## 140.9180 140.2549
t-test: t(104) = 0.06, p = .95
Women in Blechnerei are also vampires
Question 2
Do people that did not leave alone tend to drink more or less than people who did leave alone?
boxplot(drinks ~ leavealone,
data = club.df,
xlab = "Left the club alone?",
ylab = "Number of Drinks",
main = "Do drinks help to hook up?",
names = c("alone", "not alone")
)

aggregate(formula = drinks ~ leavealone,
FUN = mean,
na.rm = T,
data = club.df
)
## leavealone drinks
## 1 0 3.577465
## 2 1 4.117904
q2.test <- t.test(formula = drinks ~ leavealone,
data = club.df,
alternative = "two.sided")
APA Format for t-test
t(121) = -2.63, p = .01
People who drink more have a better chance to leave the club with someone
t.test(formula = drinks ~ leavealone,
subset = gender == "F",
data = club.df,
alternative = "two.sided"
)
##
## Welch Two Sample t-test
##
## data: drinks by leavealone
## t = -1.3791, df = 53.466, p-value = 0.1736
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.9844944 0.1821801
## sample estimates:
## mean in group 0 mean in group 1
## 3.352941 3.754098
t(53) = -1.38, p = .17
Drunk women are safe from being misused
Question 3
apa(q1.test)
## [1] "mean difference = 2.31, t(297.55) = -0.38, p = 0.7 (2-tailed)"
apa(q2.test)
## [1] "mean difference = 0.54, t(121.18) = -2.63, p < 0.01 (2-tailed)"
Question 4: Is there a relationship between the number of drinks a person has and how long they stay at the club
plot(x = club.df$time,
y = club.df$drinks,
xlab = "How long people stay at the club",
ylab = "Ammount of drinks",
main = "Relationship between time and drinks",
pch = 16,
col = gray(level = .5, alpha = .5)
)

aggregate(formula = time ~ drinks,
FUN = mean,
na.rm = T,
data = club.df
)
## drinks time
## 1 0 85.40000
## 2 1 115.84615
## 3 2 97.03226
## 4 3 129.49123
## 5 4 136.85542
## 6 5 144.95522
## 7 6 155.31034
## 8 7 174.63636
## 9 8 194.00000
## 10 9 258.00000
q4.test <- cor.test(x = club.df$time,
y = club.df$drinks
)
apa (q4.test)
## [1] "r = 0.36, t(298) = 6.7, p < 0.01 (2-tailed)"
q4.test.female <- with (subset(club.df, gender == "F" & club == "Blechnerei"),
cor.test(x = time,
y = drinks))
apa(q4.test.female)
## [1] "r = 0.34, t(59) = 2.76, p < 0.01 (2-tailed)"
Question 5
club.df$gender.as.number <- rep(c("M" == 0, "F" == 1), length.out = 300)
club.df$gender["M" == 0, "F" == 1]
subset(club.df, gender)[]
barplot(height = c(5, 3, 6, 3, 1),
names = 1:5,
col = "white"
)
