club.df <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/12/club.txt", 
                     sep = "\t", 
                     header = T, 
                     stringsAsFactors = F)

Q1 A

boxplot(time ~ gender,
        data = club.df,
        ylab = "club time",
        xlab = "gender"
        )

Q1 B

with(club.df, aggregate(time ~ gender, FUN = mean))
##   gender     time
## 1      F 134.4167
## 2      M 136.7292

Q1 C

time.women <- subset(club.df, subset = gender == "F")$time 
time.men <- subset(club.df, subset = gender == "M")$time
test.result <- t.test(x = time.women,
y = time.men, 
alternative = "two.sided"
)
test.result
## 
##  Welch Two Sample t-test
## 
## data:  time.women and time.men
## t = -0.38152, df = 297.55, p-value = 0.7031
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -14.240836   9.615836
## sample estimates:
## mean of x mean of y 
##  134.4167  136.7292

or

q1.test <- t.test(formula = time ~ gender,
subset = gender %in% c("F", "M"),
data = club.df, 
alternative = "two.sided" 
)
q1.test
## 
##  Welch Two Sample t-test
## 
## data:  time by gender
## t = -0.38152, df = 297.55, p-value = 0.7031
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -14.240836   9.615836
## sample estimates:
## mean in group F mean in group M 
##        134.4167        136.7292

Q1 D t-test: t(297.55) = -0.38, p = 0.70 # there are no difference in the amount of time women and men spend at clubs

Q1 E

test.result <- t.test(formula = time ~ gender,
subset = gender %in% c("F", "M") &
club == "Blechnerei",
data = club.df, 
alternative = "two.sided" 
)
test.result
## 
##  Welch Two Sample t-test
## 
## data:  time by gender
## t = 0.062752, df = 104.1, p-value = 0.9501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -20.29240  21.61866
## sample estimates:
## mean in group F mean in group M 
##        140.9180        140.2549

t-test: t(104.10) = 0.06, p = 0.95 # there are no difference in the amount of time women and men spend at Blechnerei

Q2 A

boxplot(drinks ~ leavealone,
        data = club.df,
        ylab = "drinks",
        xlab = "leavealone"
        )

Q2 B

with(club.df, aggregate(drinks ~ leavealone, FUN = mean))
##   leavealone   drinks
## 1          0 3.577465
## 2          1 4.117904

Q2 C

q2.test <- t.test(formula = drinks ~ leavealone,
subset = leavealone %in% c("0", "1"),
data = club.df, 
alternative = "two.sided" 
)
q2.test
## 
##  Welch Two Sample t-test
## 
## data:  drinks by leavealone
## t = -2.6253, df = 121.18, p-value = 0.009772
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.9479793 -0.1328990
## sample estimates:
## mean in group 0 mean in group 1 
##        3.577465        4.117904

Q2 D t-test: t(121.18) = -2.62, p = 0.09 # there are difference in the amount of drinks people had when they went home alone versus not alone.

Q2 E

q2.test <- t.test(formula = drinks ~ leavealone,
subset = leavealone %in% c("0", "1") &
gender == "F",
data = club.df, 
alternative = "two.sided" 
)
q2.test
## 
##  Welch Two Sample t-test
## 
## data:  drinks by leavealone
## t = -1.3791, df = 53.466, p-value = 0.1736
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.9844944  0.1821801
## sample estimates:
## mean in group 0 mean in group 1 
##        3.352941        3.754098

t-test: t(53.466) = -1.37, p = 0.17 # there are no differences in the amount of drinks women had when they went home alone versus not alone.

Q3 A

apa <- function(test.object, tails = 2, sig.digits = 2, p.lb = .01) {

  statistic.id <- substr(names(test.object$statistic), start = 1, stop = 1)
  p.value <- test.object$p.value

  if(tails == 1) {p.value <- p.value / 2}

  if (p.value < p.lb) {p.display <- paste("p < ", p.lb, " (", tails, "-tailed)", sep = "")}
  if (p.value > p.lb) {p.display <- paste("p = ", round(p.value, sig.digits), " (", tails, "-tailed)", sep = "")}


  add.par <- ""

  if(grepl("product-moment", test.object$method)) {

    estimate.display <- paste("r = ", round(test.object$estimate, sig.digits), ", ", sep = "")

  }

  if(grepl("Chi", test.object$method)) {

    estimate.display <- ""

    add.par <- paste(", N = ", sum(test.object$observed), sep = "")

  }

  if(grepl("One Sample t-test", test.object$method)) {

    estimate.display <- paste("mean = ", round(test.object$estimate, sig.digits), ", ", sep = "")

  }

  if(grepl("Two Sample t-test", test.object$method)) {

    estimate.display <- paste("mean difference = ", round(test.object$estimate[2] - test.object$estimate[1], sig.digits), ", ", sep = "")

  }




  return(paste(
    estimate.display,
    statistic.id,
    "(",
               round(test.object$parameter, sig.digits),
               add.par,
               ") = ",
               round(test.object$statistic, sig.digits),
               ", ",
               p.display,
               sep = ""
  )
  )

}

Q3 B

apa(q1.test)
## [1] "mean difference = 2.31, t(297.55) = -0.38, p = 0.7 (2-tailed)"
apa(q2.test)
## [1] "mean difference = 0.4, t(53.47) = -1.38, p = 0.17 (2-tailed)"

Q4 A

plot(x = club.df$drinks, 
     y = club.df$time,
     xlab = "drinks",
     ylab = "time",
     main = "the relationship between drinks and time"
     )

Q4 B

with(club.df, aggregate(drinks ~ time, FUN = mean))
##     time   drinks
## 1     -7 0.000000
## 2      3 5.000000
## 3      6 2.000000
## 4     12 4.000000
## 5     17 2.000000
## 6     23 4.000000
## 7     33 2.000000
## 8     35 4.000000
## 9     39 4.000000
## 10    40 3.500000
## 11    43 4.500000
## 12    44 3.000000
## 13    45 4.000000
## 14    50 2.000000
## 15    51 1.000000
## 16    52 2.500000
## 17    53 1.000000
## 18    56 1.500000
## 19    59 3.000000
## 20    63 3.500000
## 21    64 3.000000
## 22    65 5.000000
## 23    66 5.000000
## 24    67 4.000000
## 25    68 4.000000
## 26    69 2.000000
## 27    71 2.000000
## 28    72 3.400000
## 29    74 2.000000
## 30    77 3.000000
## 31    79 1.000000
## 32    80 5.000000
## 33    83 3.333333
## 34    85 3.666667
## 35    86 0.000000
## 36    87 2.000000
## 37    88 4.000000
## 38    89 3.666667
## 39    90 3.666667
## 40    91 0.500000
## 41    92 3.000000
## 42    93 5.000000
## 43    94 4.000000
## 44    97 5.000000
## 45   100 2.500000
## 46   101 2.500000
## 47   102 4.333333
## 48   103 2.000000
## 49   104 5.000000
## 50   105 4.000000
## 51   106 4.500000
## 52   108 3.000000
## 53   109 4.000000
## 54   110 5.000000
## 55   111 4.000000
## 56   112 4.000000
## 57   113 3.833333
## 58   114 4.666667
## 59   116 4.500000
## 60   117 3.000000
## 61   118 2.500000
## 62   119 5.500000
## 63   120 3.333333
## 64   121 2.000000
## 65   122 4.333333
## 66   123 4.000000
## 67   124 3.666667
## 68   125 4.250000
## 69   127 5.000000
## 70   129 5.000000
## 71   130 3.333333
## 72   131 4.500000
## 73   132 4.250000
## 74   133 6.000000
## 75   134 4.500000
## 76   135 4.000000
## 77   136 3.500000
## 78   137 5.000000
## 79   138 3.000000
## 80   141 5.333333
## 81   143 3.500000
## 82   145 5.000000
## 83   146 4.000000
## 84   147 4.000000
## 85   148 3.250000
## 86   149 6.000000
## 87   150 5.000000
## 88   151 3.600000
## 89   152 4.500000
## 90   153 4.000000
## 91   154 5.000000
## 92   155 5.000000
## 93   156 3.000000
## 94   157 4.000000
## 95   158 3.800000
## 96   159 6.000000
## 97   160 5.000000
## 98   161 3.666667
## 99   162 4.000000
## 100  164 5.000000
## 101  166 2.500000
## 102  167 4.250000
## 103  168 4.000000
## 104  169 2.000000
## 105  171 5.000000
## 106  172 1.500000
## 107  173 4.500000
## 108  174 4.000000
## 109  175 4.000000
## 110  176 5.000000
## 111  177 5.000000
## 112  178 3.500000
## 113  179 3.333333
## 114  180 6.000000
## 115  181 4.333333
## 116  183 3.500000
## 117  185 3.500000
## 118  186 4.000000
## 119  188 0.000000
## 120  189 5.000000
## 121  190 5.000000
## 122  191 5.000000
## 123  192 5.000000
## 124  193 5.000000
## 125  194 4.000000
## 126  195 4.000000
## 127  197 3.000000
## 128  198 4.000000
## 129  199 5.000000
## 130  201 7.000000
## 131  202 5.000000
## 132  204 8.000000
## 133  208 5.000000
## 134  212 6.000000
## 135  214 5.000000
## 136  215 7.000000
## 137  216 4.000000
## 138  217 6.000000
## 139  218 6.000000
## 140  219 4.000000
## 141  220 5.000000
## 142  221 5.000000
## 143  222 4.000000
## 144  226 5.000000
## 145  228 3.000000
## 146  232 7.000000
## 147  240 5.000000
## 148  245 8.000000
## 149  246 4.000000
## 150  249 5.500000
## 151  252 4.000000
## 152  256 4.000000
## 153  258 9.000000
## 154  268 3.000000

Q4 C

q4.test <- cor.test(x = club.df$drinks,
y = club.df$time
)
q4.test
## 
##  Pearson's product-moment correlation
## 
## data:  club.df$drinks and club.df$time
## t = 6.6984, df = 298, p-value = 1.05e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2591255 0.4562998
## sample estimates:
##       cor 
## 0.3617512

Q4 D correlation test: r(298) = 6.69, p = 1.05 # there is a strong correlation between people stay at the club for each drink amount

Q4 E

```