This is my WPA 6

club.df <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/12/club.txt", 
                     sep = "\t", 
                     header = T, 
                     stringsAsFactors = F)

Question 1: Women = Werewolves?

#Create a plot (e.g. boxplot or beanplot) showing the distribution of club times for males and females
boxplot(time ~ gender,
        data = club.df,
        ylab = "time",
        xlab = "gender",
        main = "distribution of gender and time"
        )

#Using grouped aggregation (e.g.; aggregate or dplyr), calculate the mean number of minutes that men and women stayed at the club(s)
with(club.df, aggregate(time ~ gender, FUN = mean))
##   gender     time
## 1      F 134.4167
## 2      M 136.7292
#Conduct a two-tailed t-test testing whether or not there is a significant difference in the amount of time women and men spend at clubs. Save the result as an object called q1.test
q1.test <- t.test(formula = time ~ gender,
subset = gender %in% c("F", "M"),
data = club.df,
alternative = "two.sided"
)

q1.test
## 
##  Welch Two Sample t-test
## 
## data:  time by gender
## t = -0.38152, df = 297.55, p-value = 0.7031
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -14.240836   9.615836
## sample estimates:
## mean in group F mean in group M 
##        134.4167        136.7292
#Write your conclusion in APA format. Be sure to address my friend’s claim that women are werewolves.
#t-test: t(297.55) = -0.38152, p = 0.7031.


#Do the results change if you only look at people who were at the Blechnerei? Using only the Blechnerei data, repeat the test and write your conclusion in APA format (Hint: Use subset()!)
q1.test <- t.test(formula = time ~ gender,
subset = gender %in% c("F", "M") & club == "Blechnerei",
data = club.df,
alternative = "two.sided"
)

q1.test
## 
##  Welch Two Sample t-test
## 
## data:  time by gender
## t = 0.062752, df = 104.1, p-value = 0.9501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -20.29240  21.61866
## sample estimates:
## mean in group F mean in group M 
##        140.9180        140.2549
#t-test: t(297.55) = -0.38152, p = 0.7031.

Question 2

#Create a plot (e.g. boxplot or beanplot) showing the distribution of drinks for people that did and did not leave alone
boxplot(drinks ~ leavealone,
        data = club.df,
        ylab = "acnout of drinks",
        xlab = "leave alone",
        main = "distribution of drinks and (not) leave alone"
        )

#Using grouped aggregation (e.g.; aggregate or dplyr), calculate the mean number of minutes that men and women stayed at the club(s)
with(club.df, aggregate(drinks ~ leavealone, FUN = mean))
##   leavealone   drinks
## 1          0 3.577465
## 2          1 4.117904
#Conduct a two-tailed t-test testing whether or not there is a significant difference in the amount of drinks people had when they went home alone versus not alone. Save the result as an object called q2.test
q2.test <- t.test(formula = drinks ~ leavealone,
subset = leavealone %in% c(0, 1),
data = club.df,
alternative = "two.sided"
)

q2.test
## 
##  Welch Two Sample t-test
## 
## data:  drinks by leavealone
## t = -2.6253, df = 121.18, p-value = 0.009772
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.9479793 -0.1328990
## sample estimates:
## mean in group 0 mean in group 1 
##        3.577465        4.117904
#Write your conclusion in APA format.
#t-test: t(121.18) = -2.6253, p = 0.009772

#Do the results change if you ignore Males and only test Females? Using only the Female data, repeat the test and write your conclusion in APA format (Hint: Use subset()!)
q2.test <- t.test(formula = drinks ~ leavealone,
subset = leavealone %in% c(0, 1) & gender == "F",
data = club.df,
alternative = "two.sided"
)
q2.test
## 
##  Welch Two Sample t-test
## 
## data:  drinks by leavealone
## t = -1.3791, df = 53.466, p-value = 0.1736
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.9844944  0.1821801
## sample estimates:
## mean in group 0 mean in group 1 
##        3.352941        3.754098
# Conclusion in APA:
#t-test: t(53.466) = -1.3791, p = 0.1736

Question 3

#PA style conclusion as an output
apa <- function(test.object, tails = 2, sig.digits = 2, p.lb = .01) {

  statistic.id <- substr(names(test.object$statistic), start = 1, stop = 1)
  p.value <- test.object$p.value

  if(tails == 1) {p.value <- p.value / 2}

  if (p.value < p.lb) {p.display <- paste("p < ", p.lb, " (", tails, "-tailed)", sep = "")}
  if (p.value > p.lb) {p.display <- paste("p = ", round(p.value, sig.digits), " (", tails, "-tailed)", sep = "")}


  add.par <- ""

  if(grepl("product-moment", test.object$method)) {

    estimate.display <- paste("r = ", round(test.object$estimate, sig.digits), ", ", sep = "")

  }

  if(grepl("Chi", test.object$method)) {

    estimate.display <- ""

    add.par <- paste(", N = ", sum(test.object$observed), sep = "")

  }

  if(grepl("One Sample t-test", test.object$method)) {

    estimate.display <- paste("mean = ", round(test.object$estimate, sig.digits), ", ", sep = "")

  }

  if(grepl("Two Sample t-test", test.object$method)) {

    estimate.display <- paste("mean difference = ", round(test.object$estimate[2] - test.object$estimate[1], sig.digits), ", ", sep = "")

  }




  return(paste(
    estimate.display,
    statistic.id,
    "(",
               round(test.object$parameter, sig.digits),
               add.par,
               ") = ",
               round(test.object$statistic, sig.digits),
               ", ",
               p.display,
               sep = ""
  )
  )

}
apa(q1.test)
## [1] "mean difference = -0.66, t(104.1) = 0.06, p = 0.95 (2-tailed)"
apa(q2.test)
## [1] "mean difference = 0.4, t(53.47) = -1.38, p = 0.17 (2-tailed)"

Question 4

#Create a plot (e.g. scatterplot) showing the relationship between drinks and time

plot(x = club.df$drinks,
     y = club.df$time,
     xlab = "Drinks",
     ylab = "time",
     main= "drinks and time")

#Using grouped aggregation (e.g.; aggregate or dplyr), calculate the mean number of minutes that people stay at the club for each drink amount.
with(club.df, aggregate(time ~ drinks, FUN = mean))
##    drinks      time
## 1       0  85.40000
## 2       1 115.84615
## 3       2  97.03226
## 4       3 129.49123
## 5       4 136.85542
## 6       5 144.95522
## 7       6 155.31034
## 8       7 174.63636
## 9       8 194.00000
## 10      9 258.00000
#Is the difference significant? Conduct a correlation test and save the result as an object called q4.test
q4.test <- cor.test(x = club.df$drinks, y = club.df$time)

q4.test
## 
##  Pearson's product-moment correlation
## 
## data:  club.df$drinks and club.df$time
## t = 6.6984, df = 298, p-value = 1.05e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2591255 0.4562998
## sample estimates:
##       cor 
## 0.3617512

Write your result in APA format. r(298) = 0.3617512, p = 1.05e-10

#Repeat the test but only for females at Blechnerei. Do you get the same conclusion? Write the results of this test in APA format
q4.test <- cor.test(x = club.df$drinks, y = club.df$time, subset = gender == "F" & club == "Blechnerei")

q4.test
## 
##  Pearson's product-moment correlation
## 
## data:  club.df$drinks and club.df$time
## t = 6.6984, df = 298, p-value = 1.05e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2591255 0.4562998
## sample estimates:
##       cor 
## 0.3617512
#Conlcusion in APA format
#r(298) = 0.3617512, p = 1.05e-10