Load libraries

library(tidyverse)
## ── Attaching packages ────────────
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ─────────────────────
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
#Set theme
theme_set(
  theme_classic() + # set the theme
    theme(text = element_text(size = 16)) 
)

Load and combine data

Diary2018 <- read.csv("./Diary2018.csv")
Diary2019 <- read.csv("./Diary2019.csv")

#Select vars I care about
Diary2018 <- Diary2018 %>% 
  rename(Anxiety = Anxiety..from.0.to.3.) %>% 
  dplyr::select(Anxiety, Caffeine) %>% 
  filter(!is.na(Anxiety))

Diary2019 <- Diary2019 %>% 
  rename(Anxiety = Anxiety..from.0.to.3.) %>% 
  dplyr::select(Anxiety, Caffeine) %>% 
  filter(!is.na(Anxiety))

#Merge the 2018 and 2019 data
Diary <- full_join(Diary2018, Diary2019)
## Joining, by = c("Anxiety", "Caffeine")
## Warning: Column `Caffeine` joining factors with different levels, coercing
## to character vector
#Account for the annoying way I decided to record caffeine 
Diary <- Diary %>% 
  mutate(Caffeine = ifelse(Caffeine == "-" | Caffeine == "", "0", "1")) #Dichotomize into had no caffeine (0) vs had caffeine (1)

Diary$Caffeine <- factor(Diary$Caffeine, 
                         levels = c(0,1),
                         labels = c("None", "Some"))

Analyze: Simple Linear Regression

Test whether I’m more anxious on days I consume caffeine:

summary(lm(Anxiety~Caffeine, Diary))
## 
## Call:
## lm(formula = Anxiety ~ Caffeine, data = Diary)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.12563 -0.51173 -0.01173  0.37437  1.73827 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.01173    0.02767  36.567   <2e-16 ***
## CaffeineSome  0.11391    0.05087   2.239   0.0255 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5992 on 664 degrees of freedom
## Multiple R-squared:  0.007494,   Adjusted R-squared:  0.005999 
## F-statistic: 5.014 on 1 and 664 DF,  p-value: 0.02548

Conclusion: Yes, I am more anxious on days I consume caffeine (F(1, 644) = 5.01, p = .03, R2 = 0.006). But the effect size is pretty tiny.

Plot Anxiety by Caffeine Consumption

ggplot(Diary, aes(x = Caffeine, y = Anxiety)) +
  geom_bar(position="dodge", stat = "summary", fun.y = "mean") +
  stat_summary(geom = "errorbar", fun.data = mean_se, position=position_dodge(.9), width = 0.2) +
  theme(text = element_text(size=20, family="serif")) +
  ylab("Anxiety (0-3)") +
  xlab("Caffeine Consumption") +
  ylim(0,3)