0) Intro Notes:

-This html file illustrates initial data cleaning, primarily showing output. Several large code chunks -have been hidden from the html file to improve readability.

1) Packages & R Markdown Setup

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(naniar)
library(gtsummary)
knitr::opts_chunk$set(include = TRUE, echo = TRUE)

2) Data Read-In

headers <- read.csv("/Users/noahwolkowicz/Desktop/CT/West Haven/Postdoc/Postdoc Research/Jenn & Noah Collab/Data/JN_Data_6.1.22.csv", skip = 0, header = F, nrows = 1, as.is = T) 
df <- read_csv("/Users/noahwolkowicz/Desktop/CT/West Haven/Postdoc/Postdoc Research/Jenn & Noah Collab/Data/JN_Data_6.1.22.csv", skip = 2)
## Rows: 892 Columns: 614
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (538): {"ImportId":"status"}, {"ImportId":"ipAddress"}, {"ImportId":"_r...
## dbl   (67): {"ImportId":"progress"}, {"ImportId":"duration"}, {"ImportId":"l...
## lgl    (6): {"ImportId":"finished"}, {"ImportId":"recipientLastName"}, {"Imp...
## dttm   (3): {"ImportId":"startDate","timeZone":"America/Denver"}, {"ImportId...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(df) <- headers

Dimensions of Initial Data Set

dim(df)
## [1] 892 616
#892 people with 616 variables

3) Inattentive Responders

df %>% janitor::tabyl(Data_Use)
##                                                 Data_Use   n    percent
##  Do not use my data. I did not devote my full attention.  87 0.09753363
##                Use my data. I devoted my full attention. 708 0.79372197
##                                                     <NA>  97 0.10874439
##  valid_percent
##       0.109434
##       0.890566
##             NA
df %>% janitor::tabyl(Failed_ATTN_Checks.f)
##  Failed_ATTN_Checks.f   n    percent valid_percent
##                Failed 168 0.18834081     0.1992883
##                Passed 675 0.75672646     0.8007117
##                  <NA>  49 0.05493274            NA
table(df$Data_Use, df$Failed_ATTN_Checks.f)
##                                                          
##                                                           Failed Passed
##   Do not use my data. I did not devote my full attention.     47     40
##   Use my data. I devoted my full attention.                  110    598
#Statistical assessment of significant differences in attentive responding across conditions
chisq.test(df$Failed_ATTN_Checks, df$Condition)
## 
##  Pearson's Chi-squared test
## 
## data:  df$Failed_ATTN_Checks and df$Condition
## X-squared = 1.4779, df = 2, p-value = 0.4776
chisq.test(df$Data_Use, df$Condition)
## 
##  Pearson's Chi-squared test
## 
## data:  df$Data_Use and df$Condition
## X-squared = 0.86774, df = 2, p-value = 0.648
#Removing participants who failed any attention check or requested their data not be used
df <- df %>% filter(Failed_ATTN_Checks.f == "Passed") %>% filter(Data_Use == "Use my data. I devoted my full attention.")

4) People Who are Missing (have “NA” for) Condition Variable

#### Checking if anyone is missing condition assignment ####
dim(df[is.na(df$Condition),]) #1 person missing/not assigned to a condition
## [1]   1 624
missing_condition <- df[is.na(df$Condition), 1]
table(missing_condition$PMI_Writing, missing_condition$`Neutral Writing`, missing_condition$NMI_Writing) 
## < table of extent 0 x 0 x 0 >
#^Code above verifies that my initial coding to create a condition variable didn't exclude anyone
missing_condition <- df %>% filter(is.na(Condition)) #Make separate df to look at this person

mean(is.na(missing_condition)) #They're missing 25% of their data
## [1] 0.2532051
miss_cond_vars <- missing_condition %>% naniar::miss_var_summary() %>% select(pct_miss)
hist(miss_cond_vars$pct_miss) #And the variables they're missing are missing 100% of the items

dim(df)
## [1] 598 624
#^Code above confirms everyone in dataset now was assigned to/completed a mood induction condition

So what’s going on with this one person who’s missing their condition variable

but appears to have completed the study?

#Hard to know for sure, but scrolling through this person's actual data file, it
#appears that they started the study and went through almost everything up to the
#mood induction. They were assigned neutral but didn't type anything and subsequently went on to 
#complete the rest of the measures. Because they didn't do any of the condition writing,
#I'm not sure we could argue they would be from the same post-induction "population"
#as folks who were exposed to the condition. Opting to remove them.

df <- df %>% filter(!is.na(Condition))
sum(is.na(df$Condition))
## [1] 0
dim(df)
## [1] 597 624

5) Demograhpics

Demographics Missingness & Table

#### Missingness in Substance Use Data ####
Missing_Demo_df <- Demo_df %>% filter(anyNA(.)) %>% arrange(ID)
vis_miss(Missing_Demo_df)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

Demo_Total_Table
## Warning: The `fmt_missing()` function is deprecated and will soon be removed
## * Use the `sub_missing()` function instead
Characteristic Full Sample By Condition
N = 5971 Negative, N = 2011 Neutral, N = 2011 Positive, N = 1951
Age M(SD)=19.33(1.93) M(SD)=19.50(2.12) M(SD)=19.27(1.67) M(SD)=19.21(1.96)
Sex-at-Birth
Female 344 (58%) 109 (54%) 134 (67%) 101 (52%)
Male 253 (42%) 92 (46%) 67 (33%) 94 (48%)
Gender
Female 343 (57%) 107 (53%) 135 (67%) 101 (52%)
Male 250 (42%) 91 (45%) 66 (33%) 93 (48%)
Non-binary 4 (0.7%) 3 (1.5%) 0 (0%) 1 (0.5%)
Sexual Orientation
Asexual 7 (1.2%) 4 (2.0%) 1 (0.5%) 2 (1.0%)
Bisexual 34 (5.7%) 7 (3.5%) 11 (5.5%) 16 (8.2%)
Heterosexual 543 (91%) 185 (92%) 185 (92%) 173 (89%)
Homosexual 13 (2.2%) 5 (2.5%) 4 (2.0%) 4 (2.1%)
Race/Ethnicity
American Indian or Alaska Native 6 (1.0%) 3 (1.5%) 3 (1.5%) 0 (0%)
Asian 12 (2.0%) 3 (1.5%) 6 (3.0%) 3 (1.6%)
Black or African American 32 (5.4%) 11 (5.5%) 10 (5.0%) 11 (5.7%)
Hispanic or Latino 47 (7.9%) 17 (8.5%) 16 (8.0%) 14 (7.3%)
Middle Eastern 3 (0.5%) 0 (0%) 1 (0.5%) 2 (1.0%)
Multiracial 16 (2.7%) 6 (3.0%) 5 (2.5%) 5 (2.6%)
White (non-Hispanic) 479 (81%) 161 (80%) 160 (80%) 158 (82%)
Student Status
No 1 (0.2%) 0 (0%) 0 (0%) 1 (0.5%)
Yes 596 (100%) 201 (100%) 201 (100%) 194 (99%)
Student Year
Freshman 366 (61%) 113 (56%) 119 (59%) 134 (69%)
Junior 52 (8.7%) 18 (9.0%) 22 (11%) 12 (6.2%)
Senior 41 (6.9%) 19 (9.5%) 13 (6.5%) 9 (4.6%)
Sophomore 138 (23%) 51 (25%) 47 (23%) 40 (21%)
1 M(SD)=Mean(SD); n (%)

Demographic Statistics

Demo_Chi_df %>% left_join(Demo_Chi_Stat, by = "Variable") %>% left_join(Demo_Chi_p, by = "Variable") %>% arrange(p_value)
## # A tibble: 9 × 4
##   Variable                df `Chi_Square/F_Value` p_value
##   <chr>                <int>                <dbl>   <dbl>
## 1 SAB.f                    2                10.4  0.00554
## 2 Gender.f                 4                14.5  0.00584
## 3 Student_Year.f           6                 9.83 0.132  
## 4 Marital_Status.f         6                 8.03 0.236  
## 5 Employment.f             6                 6.67 0.353  
## 6 Student_Status.f         2                 2.06 0.356  
## 7 Sexual_Orientation.f     6                 6.16 0.406  
## 8 Native_Language.f        8                 8.26 0.408  
## 9 Race_Ethnicity.f        12                 6.85 0.868
#Anova assessing Age differences according to condition
summary(aov(Age ~ Condition, Demo_df))
##              Df Sum Sq Mean Sq F value Pr(>F)
## Condition     2    9.6   4.782   1.288  0.276
## Residuals   593 2201.3   3.712               
## 1 observation deleted due to missingness

6) Substance Use

Substance Use Missingness & Table

Drug_df %>% vis_miss()

Drug_Total_Table
## Warning: The `fmt_missing()` function is deprecated and will soon be removed
## * Use the `sub_missing()` function instead
Characteristic Full Sample By Condition
N = 5971 Negative, N = 2011 Neutral, N = 2011 Positive, N = 1951
Drinking Frequency
Never 169 (28%) 62 (31%) 52 (26%) 55 (28%)
Monthly or less 171 (29%) 65 (32%) 58 (29%) 48 (25%)
2-4x/month 129 (22%) 33 (16%) 48 (24%) 48 (25%)
2-3x/week 106 (18%) 32 (16%) 38 (19%) 36 (18%)
4+ x/week 22 (3.7%) 9 (4.5%) 5 (2.5%) 8 (4.1%)
Drinking Quantity
1-2 242 (46%) 86 (49%) 81 (47%) 75 (43%)
3-4 151 (29%) 46 (26%) 53 (30%) 52 (30%)
5-6 81 (15%) 28 (16%) 29 (17%) 24 (14%)
7-9 40 (7.6%) 12 (6.9%) 8 (4.6%) 20 (11%)
10+ 10 (1.9%) 3 (1.7%) 3 (1.7%) 4 (2.3%)
Binge Drinking Frequency
Never 323 (54%) 110 (55%) 115 (57%) 98 (50%)
< Monthly 141 (24%) 49 (24%) 46 (23%) 46 (24%)
Monthly 78 (13%) 24 (12%) 27 (13%) 27 (14%)
Weekly 54 (9.0%) 17 (8.5%) 13 (6.5%) 24 (12%)
Daily or ~Daily 1 (0.2%) 1 (0.5%) 0 (0%) 0 (0%)
AUDIT Total M(SD)=4.8(5.2) M(SD)=4.5(5.0) M(SD)=4.5(5.0) M(SD)=5.3(5.6)
DUDIT_Total M(SD)=1.9(4.3) M(SD)=1.6(4.0) M(SD)=2.2(5.3) M(SD)=1.7(3.4)
AUD Criteria Endorsed M(SD)=1.53(2.05) M(SD)=1.39(1.97) M(SD)=1.65(2.20) M(SD)=1.56(1.99)
SUD Criteria Endorsed M(SD)=0.97(2.07) M(SD)=0.70(1.47) M(SD)=1.10(2.36) M(SD)=1.10(2.25)
AUD Diagnostic Status
Mild 128 (21%) 29 (14%) 42 (21%) 57 (29%)
Moderate 67 (11%) 25 (12%) 25 (12%) 17 (8.7%)
None 368 (62%) 137 (68%) 121 (60%) 110 (56%)
Severe 34 (5.7%) 10 (5.0%) 13 (6.5%) 11 (5.6%)
SUD Diagnostic Status
Mild 64 (11%) 17 (8.5%) 19 (9.5%) 28 (14%)
Moderate 28 (4.7%) 12 (6.0%) 10 (5.0%) 6 (3.1%)
None 477 (80%) 169 (84%) 159 (79%) 149 (76%)
Severe 28 (4.7%) 3 (1.5%) 13 (6.5%) 12 (6.2%)
1 n (%); M(SD)=Mean(SD)
Drug_Chi_df %>% left_join(Drug_Chi_Stat, by = "Variable") %>% left_join(Drug_Chi_p, by = "Variable") %>% arrange(p_value)
## # A tibble: 7 × 4
##   Variable              df `Chi_Square/F_Value` p_value
##   <chr>              <int>                <dbl>   <dbl>
## 1 MINI_AUD_Dx            6                14.5   0.0246
## 2 MINI_SUD_Dx            6                12.9   0.0452
## 3 Favorite_Caff.f        8                13.6   0.0919
## 4 AUDIT1.f               8                 8.55  0.382 
## 5 AUDIT2.f               8                 7.64  0.470 
## 6 AUDIT3.f               8                 7.13  0.523 
## 7 Favorite_Alcohol.f     6                 2.22  0.898
rbind(AUDITSum_aov, DUDITSum_aov, MINIAUDSum_aov, MINISUDSum_aov) %>% arrange(p_value)
##       Variable  F_value df_n df_d    p_value
## 1 MINI_SUD_Sum 2.595737    2  594 0.07543683
## 2    AUDIT_Sum 1.328669    2  594 0.26561531
## 3    DUDIT_Sum 1.007591    2  594 0.36572064
## 4 MINI_AUD_Sum 0.816822    2  594 0.44232921

7) Mood Induction

Mood Induction Effectiveness Across Conditions

Paired T-tests of mood valence pre-post induction

T_dataframe
##    Variable     T_stat T_df    T_p_value    T_Mdiff
## t  Negative  13.831839  200 5.673723e-31  2.2985075
## t1  Neutral  -4.708193  200 4.664240e-06 -0.7412935
## t2 Positive -10.541120  194 7.885190e-21 -1.6307692
#Means and SD for each mood induction
Mood_df %>% 
  group_by(Condition) %>% 
  select(AG1_Valence, AG2_Valence) %>% 
  summarise_all(list(M = mean, SD = sd))
## Adding missing grouping variables: `Condition`
## # A tibble: 3 × 5
##   Condition AG1_Valence_M AG2_Valence_M AG1_Valence_SD AG2_Valence_SD
##   <fct>             <dbl>         <dbl>          <dbl>          <dbl>
## 1 Negative           5.64          3.34           2.02           2.08
## 2 Neutral            5.70          6.44           2.08           2.03
## 3 Positive           5.33          6.96           2.08           1.82
Mood_df %>% 
  group_by(Condition) %>% 
  summarise(SD_Ratio = sd(AG1_Valence)/sd(AG2_Valence), #Ratios around 1 suggest most rapid decline in change #score reliability per Gollwitzer et al. (2014)
            Cor_Ratio = cor(AG1_Valence, AG2_Valence))  #Lower correlations suggest higher reliability coefficients per Gollwitzer et al. (2014)
## # A tibble: 3 × 3
##   Condition SD_Ratio Cor_Ratio
##   <fct>        <dbl>     <dbl>
## 1 Negative     0.971     0.338
## 2 Neutral      1.02      0.412
## 3 Positive     1.15      0.394

8) UPPS-P: Negative Urgency & Positive Urgency Info

UPPS-P Dataframe

glimpse(UPPSP_df)
## Rows: 597
## Columns: 66
## $ ID        <int> 23, 24, 27, 28, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, …
## $ Condition <fct> Neutral, Negative, Neutral, Negative, Negative, Neutral, Neg…
## $ UPPS_P_1  <dbl> 2, 3, 1, 1, 2, 2, 2, 2, 3, 1, 3, 4, 4, 4, 4, 2, 3, 1, 4, 1, …
## $ UPPS_P_2  <dbl> 2, 1, 2, 1, 2, 3, 1, 2, 1, 1, 1, 1, 2, 1, 4, 2, 3, 2, 2, 1, …
## $ UPPS_P_3  <dbl> 4, 3, 1, 3, 4, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 3, 3, 3, 4, 2, …
## $ UPPS_P_4  <dbl> 1, 1, 1, 4, 3, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 3, 1, …
## $ UPPS_P_5  <dbl> 1, 1, 1, 4, 2, 1, 1, 3, 1, 1, 2, 1, 2, 2, 4, 1, 1, 1, 2, 1, …
## $ UPPS_P_6  <dbl> 2, 1, 1, 3, 1, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 1, …
## $ UPPS_P_7  <dbl> 3, 1, 3, 3, 3, 3, 1, 1, 2, 1, 3, 3, 4, 2, 4, 3, 3, 3, 3, 1, …
## $ UPPS_P_8  <dbl> 3, 2, 1, 4, 2, 4, 3, 2, 3, 3, 1, 4, 3, 3, 2, 3, 2, 4, 4, 1, …
## $ UPPS_P_9  <dbl> 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, …
## $ UPPS_P_10 <dbl> 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 4, 1, 1, 1, 3, 1, …
## $ UPPS_P_11 <dbl> 1, 1, 2, 4, 4, 2, 3, 1, 2, 2, 2, 1, 1, 1, 1, 3, 4, 1, 4, 1, …
## $ UPPS_P_12 <dbl> 3, 1, 2, 4, 3, 2, 1, 2, 2, 1, 4, 2, 1, 2, 3, 3, 3, 3, 2, 2, …
## $ UPPS_P_13 <dbl> 3, 3, 1, 2, 3, 3, 1, 3, 2, 1, 4, 3, 1, 4, 4, 4, 4, 3, 4, 3, …
## $ UPPS_P_14 <dbl> 2, 2, 2, 2, 1, 3, 2, 1, 2, 1, 1, 1, 3, 1, 4, 1, 2, 2, 3, 3, …
## $ UPPS_P_15 <dbl> 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 2, 1, …
## $ UPPS_P_16 <dbl> 2, 1, 1, 3, 1, 2, 1, 2, 3, 1, 2, 2, 2, 1, 3, 2, 3, 1, 4, 1, …
## $ UPPS_P_17 <dbl> 3, 1, 1, 3, 2, 3, 1, 1, 2, 4, 2, 1, 1, 2, 4, 3, 1, 1, 3, 1, …
## $ UPPS_P_18 <dbl> 4, 3, 2, 4, 3, 4, 4, 4, 3, 3, 3, 4, 3, 4, 2, 3, 3, 4, 4, 4, …
## $ UPPS_P_19 <dbl> 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 3, …
## $ UPPS_P_20 <dbl> 1, 1, 1, 3, 3, 2, 1, 2, 1, 1, 2, 1, 3, 2, 3, 3, 1, 1, 3, 1, …
## $ UPPS_P_21 <dbl> 3, 2, 1, 3, 1, 1, 3, 2, 3, 1, 3, 2, 4, 1, 4, 2, 3, 3, 4, 2, …
## $ UPPS_P_22 <dbl> 2, 1, 1, 3, 3, 4, 1, 2, 1, 1, 1, 3, 4, 1, 4, 3, 3, 3, 3, 1, …
## $ UPPS_P_23 <dbl> 4, 3, 1, 4, 4, 2, 1, 3, 3, 1, 3, 4, 4, 4, 4, 4, 2, 2, 4, 4, …
## $ UPPS_P_24 <dbl> 1, 1, 3, 3, 2, 3, 1, 3, 1, 2, 1, 1, 4, 1, 3, 3, 4, 4, 4, 1, …
## $ UPPS_P_25 <dbl> 1, 1, 1, 4, 4, 1, 1, 3, 1, 1, 2, 1, 3, 1, 3, 2, 1, 2, 3, 1, …
## $ UPPS_P_26 <dbl> 4, 2, 2, 3, 2, 4, 3, 4, 2, 3, 1, 4, 4, 4, 4, 2, 3, 4, 4, 4, …
## $ UPPS_P_27 <dbl> 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 3, 1, …
## $ UPPS_P_28 <dbl> 2, 1, 2, 4, 1, 1, 1, 1, 3, 1, 2, 1, 4, 1, 2, 2, 3, 1, 3, 1, …
## $ UPPS_P_29 <dbl> 1, 1, 1, 4, 2, 3, 1, 2, 1, 2, 1, 2, 3, 2, 4, 3, 4, 1, 3, 3, …
## $ UPPS_P_30 <dbl> 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 3, 1, 1, 3, 1, …
## $ UPPS_P_31 <dbl> 3, 3, 2, 2, 3, 4, 2, 2, 4, 2, 3, 4, 4, 4, 4, 3, 2, 4, 4, 3, …
## $ UPPS_P_32 <dbl> 2, 1, 3, 1, 3, 3, 2, 2, 2, 1, 2, 1, 4, 1, 1, 2, 2, 3, 2, 1, …
## $ UPPS_P_33 <dbl> 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 4, 1, 1, 2, 2, 2, 3, 1, …
## $ UPPS_P_34 <dbl> 2, 2, 2, 2, 3, 4, 1, 2, 2, 1, 2, 1, 3, 2, 3, 4, 2, 3, 1, 3, …
## $ UPPS_P_35 <dbl> 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, …
## $ UPPS_P_36 <dbl> 1, 2, 1, 1, 3, 4, 2, 3, 2, 1, 1, 4, 3, 4, 4, 2, 1, 4, 4, 4, …
## $ UPPS_P_37 <dbl> 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 4, 1, 2, 1, 3, 1, 2, 1, …
## $ UPPS_P_38 <dbl> 2, 2, 1, 3, 2, 2, 1, 2, 4, 1, 3, 3, 4, 3, 2, 2, 3, 1, 4, 1, …
## $ UPPS_P_39 <dbl> 2, 1, 2, 4, 4, 2, 2, 3, 2, 2, 2, 2, 4, 2, 2, 3, 4, 2, 2, 2, …
## $ UPPS_P_40 <dbl> 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, 1, …
## $ UPPS_P_41 <dbl> 3, 3, 1, 4, 3, 4, 1, 2, 3, 1, 3, 4, 4, 4, 2, 3, 4, 2, 4, 4, …
## $ UPPS_P_42 <dbl> 1, 2, 2, 3, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 3, 1, …
## $ UPPS_P_43 <dbl> 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 4, 1, …
## $ UPPS_P_44 <dbl> 1, 1, 1, 4, 2, 3, 1, 2, 1, 1, 1, 1, 2, 2, 2, 4, 3, 1, 1, 1, …
## $ UPPS_P_45 <dbl> 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 3, 1, …
## $ UPPS_P_46 <dbl> 3, 4, 2, 1, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4, 4, 3, 1, 4, 4, 4, …
## $ UPPS_P_47 <dbl> 2, 2, 4, 3, 3, 3, 2, 3, 3, 3, 4, 1, 4, 2, 3, 3, 3, 3, 2, 1, …
## $ UPPS_P_48 <dbl> 2, 1, 1, 3, 2, 1, 1, 2, 2, 1, 3, 2, 3, 1, 3, 2, 3, 1, 3, 1, …
## $ UPPS_P_49 <dbl> 1, 1, 1, 4, 2, 1, 1, 2, 1, 1, 1, 1, 4, 1, 3, 3, 1, 1, 3, 1, …
## $ UPPS_P_50 <dbl> 1, 2, 2, 2, 4, 4, 1, 2, 1, 1, 2, 3, 3, 3, 4, 2, 4, 3, 3, 3, …
## $ UPPS_P_51 <dbl> 3, 4, 3, 1, 4, 4, 4, 4, 4, 3, 1, 4, 4, 4, 1, 3, 4, 4, 4, 4, …
## $ UPPS_P_52 <dbl> 2, 1, 1, 2, 4, 1, 1, 2, 1, 1, 1, 1, 4, 1, 2, 2, 1, 1, 3, 1, …
## $ UPPS_P_53 <dbl> 2, 2, 2, 2, 3, 3, 3, 2, 2, 1, 2, 2, 4, 1, 4, 2, 3, 1, 2, 2, …
## $ UPPS_P_54 <dbl> 2, 1, 1, 4, 2, 2, 1, 2, 1, 1, 2, 1, 3, 1, 4, 3, 1, 1, 1, 1, …
## $ UPPS_P_55 <dbl> 2, 2, 2, 3, 2, 2, 1, 2, 3, 1, 2, 2, 2, 1, 3, 2, 2, 1, 3, 1, …
## $ UPPS_P_56 <dbl> 4, 3, 2, 4, 4, 4, 3, 4, 2, 1, 3, 4, 3, 4, 4, 2, 2, 4, 4, 4, …
## $ UPPS_P_57 <dbl> 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3, 2, 4, 2, 3, …
## $ UPPS_P_58 <dbl> 3, 1, 1, 2, 3, 3, 1, 2, 1, 2, 2, 3, 4, 2, 3, 3, 3, 3, 3, 3, …
## $ UPPS_P_59 <dbl> 2, 1, 1, 4, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 4, 3, 3, 3, 3, 1, …
## $ NU_Avg    <dbl> 2.083333, 1.250000, 1.666667, 2.833333, 2.833333, 3.083333, …
## $ PU_Avg    <dbl> 1.500000, 1.142857, 1.142857, 3.000000, 2.428571, 1.285714, …
## $ SS_Avg    <dbl> 3.250000, 2.916667, 1.583333, 2.750000, 3.166667, 3.750000, …
## $ LoPM_Avg  <dbl> 2.000000, 1.545455, 1.363636, 2.818182, 1.818182, 1.636364, …
## $ LoPER_Avg <dbl> 1.5, 1.4, 2.4, 2.3, 2.0, 2.1, 1.4, 1.6, 1.5, 1.4, 1.6, 1.1, …

Measure Missingness, Means, & SDs

UPPSP_df %>% 
  select(Condition, NU_Avg, PU_Avg) %>% 
  vis_miss()

UPPSP_df %>% 
  select(Condition, NU_Avg, PU_Avg) %>% 
  drop_na() %>% 
  group_by(Condition) %>% 
  summarise_all(list(M = mean, med = median, SD = sd))
## # A tibble: 3 × 7
##   Condition NU_Avg_M PU_Avg_M NU_Avg_med PU_Avg_med NU_Avg_SD PU_Avg_SD
##   <fct>        <dbl>    <dbl>      <dbl>      <dbl>     <dbl>     <dbl>
## 1 Negative      2.23     1.83       2.25       1.79     0.629     0.569
## 2 Neutral       2.23     1.79       2.25       1.64     0.612     0.575
## 3 Positive      2.33     1.93       2.25       1.93     0.566     0.576

9) Joining Dataframes

Demo_Drug_df <- left_join(Demo_df, Drug_df, by = c("ID", "Condition"))
Demo_Drug_Mood_df <- left_join(Demo_Drug_df, Mood_df, by = c("ID", "Condition"))
Full_df <- left_join(Demo_Drug_Mood_df, UPPSP_df, by = c("ID", "Condition"))
glimpse(Full_df)
## Rows: 597
## Columns: 94
## $ ID                   <int> 23, 24, 27, 28, 47, 48, 49, 50, 51, 52, 53, 54, 5…
## $ Condition            <fct> Neutral, Negative, Neutral, Negative, Negative, N…
## $ Age                  <dbl> 22, 21, 21, 24, 20, 19, 19, 19, 20, 19, 19, 20, 2…
## $ SAB.f                <fct> Female, Male, Female, Female, Male, Male, Female,…
## $ Gender.f             <fct> Female, Male, Female, Non-binary, Male, Male, Fem…
## $ Sexual_Orientation.f <fct> Heterosexual, Heterosexual, Heterosexual, Asexual…
## $ Race_Ethnicity.f     <fct> Hispanic or Latino, White (non-Hispanic), White (…
## $ Student_Status.f     <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes,…
## $ Student_Year.f       <fct> Senior, Junior, Senior, Senior, Sophomore, Freshm…
## $ Marital_Status.f     <fct> Single, Single, Single, Married, Single, Single, …
## $ Employment.f         <fct> Unemployed, Employed 1-20 hours per week, Employe…
## $ Native_Language.f    <fct> English, English, English, English, English, Engl…
## $ AUDIT1.f             <fct> 2-4x/month, Monthly or less, 2-3x/week, 2-4x/mont…
## $ AUDIT2.f             <fct> 3-4, 1-2, 1-2, 3-4, 3-4, 3-4, 3-4, 5-6, 1-2, 1-2,…
## $ AUDIT3.f             <fct> < Monthly, Never, < Monthly, < Monthly, Never, Mo…
## $ AUDIT_Sum            <dbl> 6, 1, 6, 14, 3, 10, 4, 9, 1, 1, 3, 7, 8, 2, 20, 1…
## $ DUDIT_Sum            <dbl> 0, 9, 4, 24, 0, 3, 0, 0, 0, 0, 0, 3, 3, 0, 1, 0, …
## $ MINI_AUD_Sum         <dbl> 7, 0, 9, 6, 1, 1, 1, 3, 0, 0, 2, 4, 4, 2, 7, 0, 0…
## $ MINI_SUD_Sum         <dbl> 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 3, 0, 3, 0, 0…
## $ MINI_AUD_Dx          <fct> Severe, None, Severe, Severe, None, None, None, M…
## $ MINI_SUD_Dx          <fct> None, None, None, None, None, None, None, None, N…
## $ Date_Last_Drank      <date> 2020-03-25, 2020-03-31, 2020-03-31, 1901-01-01, …
## $ Favorite_Alcohol.f   <fct> Wine, Beer, Wine, Beer, Liquor/Spirits, Beer, Liq…
## $ Favorite_Caff.f      <fct> Coffee, Coffee, Coffee, Coffee, Coffee, Tea, Coff…
## $ AG1                  <dbl> 16, 42, 73, 77, 61, 62, 59, 60, 52, 60, 52, 62, 5…
## $ AG2                  <dbl> 41, 40, 73, 73, 56, 35, 41, 29, 52, 21, 62, 68, 3…
## $ AG1_Valence          <dbl> 7, 6, 1, 5, 7, 8, 5, 6, 7, 6, 7, 8, 8, 9, 5, 7, 7…
## $ AG1_Arousal          <dbl> 2, 5, 9, 9, 7, 7, 7, 7, 6, 7, 6, 7, 6, 6, 9, 7, 6…
## $ AG2_Valence          <dbl> 5, 4, 1, 1, 2, 8, 5, 2, 7, 3, 8, 5, 3, 9, 8, 4, 7…
## $ AG2_Arousal          <dbl> 5, 5, 9, 9, 7, 4, 5, 4, 6, 3, 7, 8, 5, 7, 7, 4, 8…
## $ UPPS_P_1             <dbl> 2, 3, 1, 1, 2, 2, 2, 2, 3, 1, 3, 4, 4, 4, 4, 2, 3…
## $ UPPS_P_2             <dbl> 2, 1, 2, 1, 2, 3, 1, 2, 1, 1, 1, 1, 2, 1, 4, 2, 3…
## $ UPPS_P_3             <dbl> 4, 3, 1, 3, 4, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 3, 3…
## $ UPPS_P_4             <dbl> 1, 1, 1, 4, 3, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2…
## $ UPPS_P_5             <dbl> 1, 1, 1, 4, 2, 1, 1, 3, 1, 1, 2, 1, 2, 2, 4, 1, 1…
## $ UPPS_P_6             <dbl> 2, 1, 1, 3, 1, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2…
## $ UPPS_P_7             <dbl> 3, 1, 3, 3, 3, 3, 1, 1, 2, 1, 3, 3, 4, 2, 4, 3, 3…
## $ UPPS_P_8             <dbl> 3, 2, 1, 4, 2, 4, 3, 2, 3, 3, 1, 4, 3, 3, 2, 3, 2…
## $ UPPS_P_9             <dbl> 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2…
## $ UPPS_P_10            <dbl> 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 4, 1, 1…
## $ UPPS_P_11            <dbl> 1, 1, 2, 4, 4, 2, 3, 1, 2, 2, 2, 1, 1, 1, 1, 3, 4…
## $ UPPS_P_12            <dbl> 3, 1, 2, 4, 3, 2, 1, 2, 2, 1, 4, 2, 1, 2, 3, 3, 3…
## $ UPPS_P_13            <dbl> 3, 3, 1, 2, 3, 3, 1, 3, 2, 1, 4, 3, 1, 4, 4, 4, 4…
## $ UPPS_P_14            <dbl> 2, 2, 2, 2, 1, 3, 2, 1, 2, 1, 1, 1, 3, 1, 4, 1, 2…
## $ UPPS_P_15            <dbl> 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 3, 1, 1…
## $ UPPS_P_16            <dbl> 2, 1, 1, 3, 1, 2, 1, 2, 3, 1, 2, 2, 2, 1, 3, 2, 3…
## $ UPPS_P_17            <dbl> 3, 1, 1, 3, 2, 3, 1, 1, 2, 4, 2, 1, 1, 2, 4, 3, 1…
## $ UPPS_P_18            <dbl> 4, 3, 2, 4, 3, 4, 4, 4, 3, 3, 3, 4, 3, 4, 2, 3, 3…
## $ UPPS_P_19            <dbl> 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2…
## $ UPPS_P_20            <dbl> 1, 1, 1, 3, 3, 2, 1, 2, 1, 1, 2, 1, 3, 2, 3, 3, 1…
## $ UPPS_P_21            <dbl> 3, 2, 1, 3, 1, 1, 3, 2, 3, 1, 3, 2, 4, 1, 4, 2, 3…
## $ UPPS_P_22            <dbl> 2, 1, 1, 3, 3, 4, 1, 2, 1, 1, 1, 3, 4, 1, 4, 3, 3…
## $ UPPS_P_23            <dbl> 4, 3, 1, 4, 4, 2, 1, 3, 3, 1, 3, 4, 4, 4, 4, 4, 2…
## $ UPPS_P_24            <dbl> 1, 1, 3, 3, 2, 3, 1, 3, 1, 2, 1, 1, 4, 1, 3, 3, 4…
## $ UPPS_P_25            <dbl> 1, 1, 1, 4, 4, 1, 1, 3, 1, 1, 2, 1, 3, 1, 3, 2, 1…
## $ UPPS_P_26            <dbl> 4, 2, 2, 3, 2, 4, 3, 4, 2, 3, 1, 4, 4, 4, 4, 2, 3…
## $ UPPS_P_27            <dbl> 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2…
## $ UPPS_P_28            <dbl> 2, 1, 2, 4, 1, 1, 1, 1, 3, 1, 2, 1, 4, 1, 2, 2, 3…
## $ UPPS_P_29            <dbl> 1, 1, 1, 4, 2, 3, 1, 2, 1, 2, 1, 2, 3, 2, 4, 3, 4…
## $ UPPS_P_30            <dbl> 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 3, 1…
## $ UPPS_P_31            <dbl> 3, 3, 2, 2, 3, 4, 2, 2, 4, 2, 3, 4, 4, 4, 4, 3, 2…
## $ UPPS_P_32            <dbl> 2, 1, 3, 1, 3, 3, 2, 2, 2, 1, 2, 1, 4, 1, 1, 2, 2…
## $ UPPS_P_33            <dbl> 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 4, 1, 1, 2, 2…
## $ UPPS_P_34            <dbl> 2, 2, 2, 2, 3, 4, 1, 2, 2, 1, 2, 1, 3, 2, 3, 4, 2…
## $ UPPS_P_35            <dbl> 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1…
## $ UPPS_P_36            <dbl> 1, 2, 1, 1, 3, 4, 2, 3, 2, 1, 1, 4, 3, 4, 4, 2, 1…
## $ UPPS_P_37            <dbl> 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 4, 1, 2, 1, 3…
## $ UPPS_P_38            <dbl> 2, 2, 1, 3, 2, 2, 1, 2, 4, 1, 3, 3, 4, 3, 2, 2, 3…
## $ UPPS_P_39            <dbl> 2, 1, 2, 4, 4, 2, 2, 3, 2, 2, 2, 2, 4, 2, 2, 3, 4…
## $ UPPS_P_40            <dbl> 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1…
## $ UPPS_P_41            <dbl> 3, 3, 1, 4, 3, 4, 1, 2, 3, 1, 3, 4, 4, 4, 2, 3, 4…
## $ UPPS_P_42            <dbl> 1, 2, 2, 3, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2…
## $ UPPS_P_43            <dbl> 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2…
## $ UPPS_P_44            <dbl> 1, 1, 1, 4, 2, 3, 1, 2, 1, 1, 1, 1, 2, 2, 2, 4, 3…
## $ UPPS_P_45            <dbl> 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1…
## $ UPPS_P_46            <dbl> 3, 4, 2, 1, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4, 4, 3, 1…
## $ UPPS_P_47            <dbl> 2, 2, 4, 3, 3, 3, 2, 3, 3, 3, 4, 1, 4, 2, 3, 3, 3…
## $ UPPS_P_48            <dbl> 2, 1, 1, 3, 2, 1, 1, 2, 2, 1, 3, 2, 3, 1, 3, 2, 3…
## $ UPPS_P_49            <dbl> 1, 1, 1, 4, 2, 1, 1, 2, 1, 1, 1, 1, 4, 1, 3, 3, 1…
## $ UPPS_P_50            <dbl> 1, 2, 2, 2, 4, 4, 1, 2, 1, 1, 2, 3, 3, 3, 4, 2, 4…
## $ UPPS_P_51            <dbl> 3, 4, 3, 1, 4, 4, 4, 4, 4, 3, 1, 4, 4, 4, 1, 3, 4…
## $ UPPS_P_52            <dbl> 2, 1, 1, 2, 4, 1, 1, 2, 1, 1, 1, 1, 4, 1, 2, 2, 1…
## $ UPPS_P_53            <dbl> 2, 2, 2, 2, 3, 3, 3, 2, 2, 1, 2, 2, 4, 1, 4, 2, 3…
## $ UPPS_P_54            <dbl> 2, 1, 1, 4, 2, 2, 1, 2, 1, 1, 2, 1, 3, 1, 4, 3, 1…
## $ UPPS_P_55            <dbl> 2, 2, 2, 3, 2, 2, 1, 2, 3, 1, 2, 2, 2, 1, 3, 2, 2…
## $ UPPS_P_56            <dbl> 4, 3, 2, 4, 4, 4, 3, 4, 2, 1, 3, 4, 3, 4, 4, 2, 2…
## $ UPPS_P_57            <dbl> 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3, 2…
## $ UPPS_P_58            <dbl> 3, 1, 1, 2, 3, 3, 1, 2, 1, 2, 2, 3, 4, 2, 3, 3, 3…
## $ UPPS_P_59            <dbl> 2, 1, 1, 4, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 4, 3, 3…
## $ NU_Avg               <dbl> 2.083333, 1.250000, 1.666667, 2.833333, 2.833333,…
## $ PU_Avg               <dbl> 1.500000, 1.142857, 1.142857, 3.000000, 2.428571,…
## $ SS_Avg               <dbl> 3.250000, 2.916667, 1.583333, 2.750000, 3.166667,…
## $ LoPM_Avg             <dbl> 2.000000, 1.545455, 1.363636, 2.818182, 1.818182,…
## $ LoPER_Avg            <dbl> 1.5, 1.4, 2.4, 2.3, 2.0, 2.1, 1.4, 1.6, 1.5, 1.4,…

10) Correlations Amongst Measures

Full_df %>% 
  select(NU_Avg, PU_Avg, MINI_AUD_Sum, AUDIT_Sum, AG1_Valence, AG2_Valence) %>%
  PerformanceAnalytics::chart.Correlation()

11) Writing data to csv

Code below hased out to prevent continual re-writing of csv upon markdown publications.

#Full_df %>% write_csv("/Users/noahwolkowicz/Desktop/CT/West Haven/Postdoc/Postdoc Research/Jenn & Noah Collab/Data/JN_Collab_6.16.22.csv")