Data: Club.df

club.df <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/12/club.txt", 
                     sep = "\t", 
                     header = T, 
                     stringsAsFactors = F)

Question 1

boxplot(time ~ gender,
        data = club.df,
        ylim = c(0, 270),
        ylab = "Time (min.)",
        xlab = "Gender",
        main = "Distribution of club time")

aggregate(time ~ gender,
          data = club.df,
          FUN = mean)
##   gender     time
## 1      F 134.4167
## 2      M 136.7292
q1.test <- t.test(formula = time ~ gender,
                      data = club.df)

q1.test$statistic
##          t 
## -0.3815224
q1.test$parameter
##       df 
## 297.5547
q1.test$p.value
## [1] 0.703088
# There is no sign. difference in the amount of time women and men spend at clubs, (t(297.55)= -0.38, p = 0.7).


t.test(formula = time ~ gender,
                      data = club.df,
                   subset = club == "Blechnerei")
## 
##  Welch Two Sample t-test
## 
## data:  time by gender
## t = 0.062752, df = 104.1, p-value = 0.9501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -20.29240  21.61866
## sample estimates:
## mean in group F mean in group M 
##        140.9180        140.2549
# No the results didn't change, there is still no sign. difference in the amount of time women and men spend at the club (Blechnerei), (t(104.10)= 0.06, p = 0.95).

Question 2

boxplot(drinks ~ leavealone,
        data = club.df,
        ylim = c(0, 10),
        ylab = "Number of Drinks",
        xlab = "Leaving alone yes (0) vs no (1)",
        main = "Distribution of Drinks")

aggregate(formula = drinks ~ leavealone,
data = club.df,
FUN = mean)
##   leavealone   drinks
## 1          0 3.577465
## 2          1 4.117904
q2.test <- t.test(formula = drinks ~ leavealone,
                  data = club.df)

q2.test$statistic
##         t 
## -2.625326
q2.test$parameter
##       df 
## 121.1829
q2.test$p.value
## [1] 0.009772036
# There is a significant difference in the amount of drinks people had when they went home alone vs. not alone, (t(121.18)= -2.63, p < 0.01).

club.df2 <- subset(club.df, gender == "F")
  
q2.etest <- t.test(x = club.df2$drinks,
                   y = club.df2$leavealone,
                   alternative = "two.sided")
q2.etest$statistic
##        t 
## 22.91244
q2.etest$parameter
##      df 
## 177.983
q2.etest$p.value
## [1] 5.629867e-55
# Yes I got the same conclusion when I just test this hypothesis on women, that there is a sign. difference, (t(177.98)= 22.912, p < 0.01) 

Question 3

apa <- function(test.object, tails = 2, sig.digits = 2, p.lb = .01) {

  statistic.id <- substr(names(test.object$statistic), start = 1, stop = 1)
  p.value <- test.object$p.value

  if(tails == 1) {p.value <- p.value / 2}

  if (p.value < p.lb) {p.display <- paste("p < ", p.lb, " (", tails, "-tailed)", sep = "")}
  if (p.value > p.lb) {p.display <- paste("p = ", round(p.value, sig.digits), " (", tails, "-tailed)", sep = "")}


  add.par <- ""

  if(grepl("product-moment", test.object$method)) {

    estimate.display <- paste("r = ", round(test.object$estimate, sig.digits), ", ", sep = "")

  }

  if(grepl("Chi", test.object$method)) {

    estimate.display <- ""

    add.par <- paste(", N = ", sum(test.object$observed), sep = "")

  }

  if(grepl("One Sample t-test", test.object$method)) {

    estimate.display <- paste("mean = ", round(test.object$estimate, sig.digits), ", ", sep = "")

  }

  if(grepl("Two Sample t-test", test.object$method)) {

    estimate.display <- paste("mean difference = ", round(test.object$estimate[2] - test.object$estimate[1], sig.digits), ", ", sep = "")

  }




  return(paste(
    estimate.display,
    statistic.id,
    "(",
               round(test.object$parameter, sig.digits),
               add.par,
               ") = ",
               round(test.object$statistic, sig.digits),
               ", ",
               p.display,
               sep = ""
  )
  )

}
apa(q1.test)
## [1] "mean difference = 2.31, t(297.55) = -0.38, p = 0.7 (2-tailed)"
apa(q2.test)
## [1] "mean difference = 0.54, t(121.18) = -2.63, p < 0.01 (2-tailed)"
# Yes the results match with my answers.

Question 4

plot(y = club.df$time,
     x = club.df$drinks,
     ylab = "Time (min.)",
     xlab = "Number of drinks",
     ylim = c(0, 270),
     xlim = c(0, 10),
     main = "Relationship between Drinks and Time")

aggregate(formula = time ~ drinks,
          data = club.df,
          FUN = mean)
##    drinks      time
## 1       0  85.40000
## 2       1 115.84615
## 3       2  97.03226
## 4       3 129.49123
## 5       4 136.85542
## 6       5 144.95522
## 7       6 155.31034
## 8       7 174.63636
## 9       8 194.00000
## 10      9 258.00000
q4.test <- cor.test(x = club.df$time,
                    y = club.df$drinks)
q4.test$estimate
##       cor 
## 0.3617512
q4.test$parameter
##  df 
## 298
q4.test$p.value
## [1] 1.049536e-10
# There is a sign. positive correlation between drinks and time spend at the club, (r(298)= 0.36, p< 0.01).


femclub.df <- subset(club.df, gender == "F" & club == "Blechnerei")

cor.test(x = femclub.df$time,
                     y = femclub.df$drinks)
## 
##  Pearson's product-moment correlation
## 
## data:  femclub.df$time and femclub.df$drinks
## t = 2.7597, df = 59, p-value = 0.007695
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.09433171 0.54365162
## sample estimates:
##       cor 
## 0.3381205
# Yes there is still a positive correlation between drinks and time spend at the club for females who went to Blechnerei, (r(59)= 0.34, p< 0.01).

Question 5

club.df$gender2.log[club.df$gender == "M"] <- 1
club.df$gender2.log[club.df$gender == "F"] <- 0

aggregate(formula = gender2.log ~ club,
          data = club.df,
          FUN = mean)
##         club gender2.log
## 1     Barrys   0.3700000
## 2 Blechnerei   0.4553571
## 3    Kantine   0.6363636
agg.result <- aggregate(formula = gender2.log ~ club,
          data = club.df,
          FUN = mean)

barplot(height = agg.result$gender2.log,
        names = agg.result$club,
        ylim = c(0, 1),
        col = "royalblue3",
        ylab = "% Males",
        xlab = "Clubs")

q5.test <- chisq.test(x = club.df$gender,
           y = club.df$club)

q5.test$statistic
## X-squared 
##   13.7403
q5.test$parameter
## df 
##  2
q5.test$p.value
## [1] 0.001038323
# There is a sign. relationship between club and gender,(X2(2)= 13.74, p< 0.01). 


club.df3 <- subset(club.df, club %in% c("Barrys", "Kantine"))

chisq.test(x = club.df3$gender,
           y = club.df3$club)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  club.df3$gender and club.df3$club
## X-squared = 12.241, df = 1, p-value = 0.0004674
# Yes there was a sign. difference between just Barrys and Kantine, (X2(1)= 12.24, p< 0.01).

Question 6

aggregate(formula = leavealone ~ gender,
          data = club.df,
          FUN = mean)
##   gender leavealone
## 1      F  0.7820513
## 2      M  0.7430556
agg.result1 <- aggregate(formula = leavealone ~ gender,
          data = club.df,
          FUN = mean) 

barplot(height = agg.result1$leavealone,
        names = agg.result1$gender,
        ylim = c(0, 1),
        col = "royalblue3",
        ylab = "% People leaving alone",
        xlab = "Gender")

q6.test <- chisq.test(x = club.df$gender,
           y = club.df$leavealone)

q6.test$statistic
## X-squared 
## 0.4329259
q6.test$parameter
## df 
##  1
q6.test$p.value
## [1] 0.5105567
# There is no sign. relationship between leaving the club alone and gender,(X2(1)= 0.43, p= 0.51). 


club.df60 <- subset(club.df, time > 60)

chisq.test(x = club.df60$gender,
         y = club.df60$leavealone)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  club.df60$gender and club.df60$leavealone
## X-squared = 0.88492, df = 1, p-value = 0.3469
# Yes my conclusion that there is no sign. relationship between leaving the club alone and gender holds when just including people who stayed more than 60min. at the club, (X2(1)= 0.88, p= 0.35).