0) Intro Notes:

-This html file illustrates initial data cleaning, primarily showing output. Several large code chunks -have been hidden from the html file to improve readability.

1) Packages & R Markdown Setup

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(naniar)
library(gtsummary)

knitr::opts_chunk$set(include = TRUE, echo = TRUE)

2) Data Read-In

headers <- read.csv("/Users/noahwolkowicz/Desktop/CT/West Haven/Postdoc/Postdoc Research/Jenn & Noah Collab/Data/JN_Data_6.1.22.csv", skip = 0, header = F, nrows = 1, as.is = T) 
df <- read_csv("/Users/noahwolkowicz/Desktop/CT/West Haven/Postdoc/Postdoc Research/Jenn & Noah Collab/Data/JN_Data_6.1.22.csv", skip = 2)

## Rows: 892 Columns: 614
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (538): {"ImportId":"status"}, {"ImportId":"ipAddress"}, {"ImportId":"_r...
## dbl   (67): {"ImportId":"progress"}, {"ImportId":"duration"}, {"ImportId":"l...
## lgl    (6): {"ImportId":"finished"}, {"ImportId":"recipientLastName"}, {"Imp...
## dttm   (3): {"ImportId":"startDate","timeZone":"America/Denver"}, {"ImportId...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

colnames(df) <- headers

Dimensions of Initial Data Set

dim(df)

## [1] 892 616

#892 people with 616 variables

3) Inattentive Responders

df %>% janitor::tabyl(Data_Use)

##                                                 Data_Use   n    percent
##  Do not use my data. I did not devote my full attention.  87 0.09753363
##                Use my data. I devoted my full attention. 708 0.79372197
##                                                     <NA>  97 0.10874439
##  valid_percent
##       0.109434
##       0.890566
##             NA

df %>% janitor::tabyl(Failed_ATTN_Checks.f)

##  Failed_ATTN_Checks.f   n    percent valid_percent
##                Failed 168 0.18834081     0.1992883
##                Passed 675 0.75672646     0.8007117
##                  <NA>  49 0.05493274            NA

table(df$Data_Use, df$Failed_ATTN_Checks.f)

##                                                          
##                                                           Failed Passed
##   Do not use my data. I did not devote my full attention.     47     40
##   Use my data. I devoted my full attention.                  110    598

#Statistical assessment of significant differences in attentive responding across conditions
chisq.test(df$Failed_ATTN_Checks, df$Condition)

## 
##  Pearson's Chi-squared test
## 
## data:  df$Failed_ATTN_Checks and df$Condition
## X-squared = 1.4779, df = 2, p-value = 0.4776

chisq.test(df$Data_Use, df$Condition)

## 
##  Pearson's Chi-squared test
## 
## data:  df$Data_Use and df$Condition
## X-squared = 0.86774, df = 2, p-value = 0.648

#Removing participants who failed any attention check or requested their data not be used
df <- df %>% filter(Failed_ATTN_Checks.f == "Passed") %>% filter(Data_Use == "Use my data. I devoted my full attention.")

4) People Who are Missing (have “NA” for) Condition Variable

#### Checking if anyone is missing condition assignment ####
dim(df[is.na(df$Condition),]) #1 person missing/not assigned to a condition

## [1]   1 624

missing_condition <- df[is.na(df$Condition), 1]
table(missing_condition$PMI_Writing, missing_condition$`Neutral Writing`, missing_condition$NMI_Writing)

## < table of extent 0 x 0 x 0 >

#^Code above verifies that my initial coding to create a condition variable didn't exclude anyone
missing_condition <- df %>% filter(is.na(Condition)) #Make separate df to look at this person

mean(is.na(missing_condition)) #They're missing 25% of their data

## [1] 0.2532051

miss_cond_vars <- missing_condition %>% naniar::miss_var_summary() %>% select(pct_miss)
hist(miss_cond_vars$pct_miss) #And the variables they're missing are missing 100% of the items

dim(df)

## [1] 598 624

#^Code above confirms everyone in dataset now was assigned to/completed a mood induction condition

So what’s going on with this one person who’s missing their condition variable

but appears to have completed the study?

#Hard to know for sure, but scrolling through this person's actual data file, it
#appears that they started the study and went through almost everything up to the
#mood induction. They were assigned neutral but didn't type anything and subsequently went on to 
#complete the rest of the measures. Because they didn't do any of the condition writing,
#I'm not sure we could argue they would be from the same post-induction "population"
#as folks who were exposed to the condition. Opting to remove them.

df <- df %>% filter(!is.na(Condition))
sum(is.na(df$Condition))

## [1] 0

dim(df)

## [1] 597 624

5) Demograhpics

Demographics Missingness & Table

#### Missingness in Substance Use Data ####
Missing_Demo_df <- Demo_df %>% filter(anyNA(.)) %>% arrange(ID)
vis_miss(Missing_Demo_df)

## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

Demo_Total_Table

## Warning: The `fmt_missing()` function is deprecated and will soon be removed
## * Use the `sub_missing()` function instead

Characteristic	Full Sample	By Condition
Characteristic	N = 597¹	Negative, N = 201¹	Neutral, N = 201¹	Positive, N = 195¹
Age	M(SD)=19.33(1.93)	M(SD)=19.50(2.12)	M(SD)=19.27(1.67)	M(SD)=19.21(1.96)
Sex-at-Birth
Female	344 (58%)	109 (54%)	134 (67%)	101 (52%)
Male	253 (42%)	92 (46%)	67 (33%)	94 (48%)
Gender
Female	343 (57%)	107 (53%)	135 (67%)	101 (52%)
Male	250 (42%)	91 (45%)	66 (33%)	93 (48%)
Non-binary	4 (0.7%)	3 (1.5%)	0 (0%)	1 (0.5%)
Sexual Orientation
Asexual	7 (1.2%)	4 (2.0%)	1 (0.5%)	2 (1.0%)
Bisexual	34 (5.7%)	7 (3.5%)	11 (5.5%)	16 (8.2%)
Heterosexual	543 (91%)	185 (92%)	185 (92%)	173 (89%)
Homosexual	13 (2.2%)	5 (2.5%)	4 (2.0%)	4 (2.1%)
Race/Ethnicity
American Indian or Alaska Native	6 (1.0%)	3 (1.5%)	3 (1.5%)	0 (0%)
Asian	12 (2.0%)	3 (1.5%)	6 (3.0%)	3 (1.6%)
Black or African American	32 (5.4%)	11 (5.5%)	10 (5.0%)	11 (5.7%)
Hispanic or Latino	47 (7.9%)	17 (8.5%)	16 (8.0%)	14 (7.3%)
Middle Eastern	3 (0.5%)	0 (0%)	1 (0.5%)	2 (1.0%)
Multiracial	16 (2.7%)	6 (3.0%)	5 (2.5%)	5 (2.6%)
White (non-Hispanic)	479 (81%)	161 (80%)	160 (80%)	158 (82%)
Student Status
No	1 (0.2%)	0 (0%)	0 (0%)	1 (0.5%)
Yes	596 (100%)	201 (100%)	201 (100%)	194 (99%)
Student Year
Freshman	366 (61%)	113 (56%)	119 (59%)	134 (69%)
Junior	52 (8.7%)	18 (9.0%)	22 (11%)	12 (6.2%)
Senior	41 (6.9%)	19 (9.5%)	13 (6.5%)	9 (4.6%)
Sophomore	138 (23%)	51 (25%)	47 (23%)	40 (21%)
¹ M(SD)=Mean(SD); n (%)

Demographic Statistics

Demo_Chi_df %>% left_join(Demo_Chi_Stat, by = "Variable") %>% left_join(Demo_Chi_p, by = "Variable") %>% arrange(p_value)

## # A tibble: 9 × 4
##   Variable                df `Chi_Square/F_Value` p_value
##   <chr>                <int>                <dbl>   <dbl>
## 1 SAB.f                    2                10.4  0.00554
## 2 Gender.f                 4                14.5  0.00584
## 3 Student_Year.f           6                 9.83 0.132  
## 4 Marital_Status.f         6                 8.03 0.236  
## 5 Employment.f             6                 6.67 0.353  
## 6 Student_Status.f         2                 2.06 0.356  
## 7 Sexual_Orientation.f     6                 6.16 0.406  
## 8 Native_Language.f        8                 8.26 0.408  
## 9 Race_Ethnicity.f        12                 6.85 0.868

#Anova assessing Age differences according to condition
summary(aov(Age ~ Condition, Demo_df))

##              Df Sum Sq Mean Sq F value Pr(>F)
## Condition     2    9.6   4.782   1.288  0.276
## Residuals   593 2201.3   3.712               
## 1 observation deleted due to missingness

6) Substance Use

Substance Use Missingness & Table

Drug_df %>% vis_miss()

Drug_Total_Table

## Warning: The `fmt_missing()` function is deprecated and will soon be removed
## * Use the `sub_missing()` function instead

Characteristic	Full Sample	By Condition
Characteristic	N = 597¹	Negative, N = 201¹	Neutral, N = 201¹	Positive, N = 195¹
Drinking Frequency
Never	169 (28%)	62 (31%)	52 (26%)	55 (28%)
Monthly or less	171 (29%)	65 (32%)	58 (29%)	48 (25%)
2-4x/month	129 (22%)	33 (16%)	48 (24%)	48 (25%)
2-3x/week	106 (18%)	32 (16%)	38 (19%)	36 (18%)
4+ x/week	22 (3.7%)	9 (4.5%)	5 (2.5%)	8 (4.1%)
Drinking Quantity
1-2	242 (46%)	86 (49%)	81 (47%)	75 (43%)
3-4	151 (29%)	46 (26%)	53 (30%)	52 (30%)
5-6	81 (15%)	28 (16%)	29 (17%)	24 (14%)
7-9	40 (7.6%)	12 (6.9%)	8 (4.6%)	20 (11%)
10+	10 (1.9%)	3 (1.7%)	3 (1.7%)	4 (2.3%)
Binge Drinking Frequency
Never	323 (54%)	110 (55%)	115 (57%)	98 (50%)
< Monthly	141 (24%)	49 (24%)	46 (23%)	46 (24%)
Monthly	78 (13%)	24 (12%)	27 (13%)	27 (14%)
Weekly	54 (9.0%)	17 (8.5%)	13 (6.5%)	24 (12%)
Daily or ~Daily	1 (0.2%)	1 (0.5%)	0 (0%)	0 (0%)
AUDIT Total	M(SD)=4.8(5.2)	M(SD)=4.5(5.0)	M(SD)=4.5(5.0)	M(SD)=5.3(5.6)
DUDIT_Total	M(SD)=1.9(4.3)	M(SD)=1.6(4.0)	M(SD)=2.2(5.3)	M(SD)=1.7(3.4)
AUD Criteria Endorsed	M(SD)=1.53(2.05)	M(SD)=1.39(1.97)	M(SD)=1.65(2.20)	M(SD)=1.56(1.99)
SUD Criteria Endorsed	M(SD)=0.97(2.07)	M(SD)=0.70(1.47)	M(SD)=1.10(2.36)	M(SD)=1.10(2.25)
AUD Diagnostic Status
Mild	128 (21%)	29 (14%)	42 (21%)	57 (29%)
Moderate	67 (11%)	25 (12%)	25 (12%)	17 (8.7%)
None	368 (62%)	137 (68%)	121 (60%)	110 (56%)
Severe	34 (5.7%)	10 (5.0%)	13 (6.5%)	11 (5.6%)
SUD Diagnostic Status
Mild	64 (11%)	17 (8.5%)	19 (9.5%)	28 (14%)
Moderate	28 (4.7%)	12 (6.0%)	10 (5.0%)	6 (3.1%)
None	477 (80%)	169 (84%)	159 (79%)	149 (76%)
Severe	28 (4.7%)	3 (1.5%)	13 (6.5%)	12 (6.2%)
¹ n (%); M(SD)=Mean(SD)

Drug_Chi_df %>% left_join(Drug_Chi_Stat, by = "Variable") %>% left_join(Drug_Chi_p, by = "Variable") %>% arrange(p_value)

## # A tibble: 7 × 4
##   Variable              df `Chi_Square/F_Value` p_value
##   <chr>              <int>                <dbl>   <dbl>
## 1 MINI_AUD_Dx            6                14.5   0.0246
## 2 MINI_SUD_Dx            6                12.9   0.0452
## 3 Favorite_Caff.f        8                13.6   0.0919
## 4 AUDIT1.f               8                 8.55  0.382 
## 5 AUDIT2.f               8                 7.64  0.470 
## 6 AUDIT3.f               8                 7.13  0.523 
## 7 Favorite_Alcohol.f     6                 2.22  0.898

rbind(AUDITSum_aov, DUDITSum_aov, MINIAUDSum_aov, MINISUDSum_aov) %>% arrange(p_value)

##       Variable  F_value df_n df_d    p_value
## 1 MINI_SUD_Sum 2.595737    2  594 0.07543683
## 2    AUDIT_Sum 1.328669    2  594 0.26561531
## 3    DUDIT_Sum 1.007591    2  594 0.36572064
## 4 MINI_AUD_Sum 0.816822    2  594 0.44232921

7) Mood Induction

Mood Induction Effectiveness Across Conditions

Paired T-tests of mood valence pre-post induction

T_dataframe

##    Variable     T_stat T_df    T_p_value    T_Mdiff
## t  Negative  13.831839  200 5.673723e-31  2.2985075
## t1  Neutral  -4.708193  200 4.664240e-06 -0.7412935
## t2 Positive -10.541120  194 7.885190e-21 -1.6307692

#Means and SD for each mood induction
Mood_df %>% 
  group_by(Condition) %>% 
  select(AG1_Valence, AG2_Valence) %>% 
  summarise_all(list(M = mean, SD = sd))

## Adding missing grouping variables: `Condition`

## # A tibble: 3 × 5
##   Condition AG1_Valence_M AG2_Valence_M AG1_Valence_SD AG2_Valence_SD
##   <fct>             <dbl>         <dbl>          <dbl>          <dbl>
## 1 Negative           5.64          3.34           2.02           2.08
## 2 Neutral            5.70          6.44           2.08           2.03
## 3 Positive           5.33          6.96           2.08           1.82

Mood_df %>% 
  group_by(Condition) %>% 
  summarise(SD_Ratio = sd(AG1_Valence)/sd(AG2_Valence), #Ratios around 1 suggest most rapid decline in change #score reliability per Gollwitzer et al. (2014)
            Cor_Ratio = cor(AG1_Valence, AG2_Valence))  #Lower correlations suggest higher reliability coefficients per Gollwitzer et al. (2014)

## # A tibble: 3 × 3
##   Condition SD_Ratio Cor_Ratio
##   <fct>        <dbl>     <dbl>
## 1 Negative     0.971     0.338
## 2 Neutral      1.02      0.412
## 3 Positive     1.15      0.394

8) UPPS-P: Negative Urgency & Positive Urgency Info

UPPS-P Dataframe

glimpse(UPPSP_df)

## Rows: 597
## Columns: 66
## $ ID        <int> 23, 24, 27, 28, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, …
## $ Condition <fct> Neutral, Negative, Neutral, Negative, Negative, Neutral, Neg…
## $ UPPS_P_1  <dbl> 2, 3, 1, 1, 2, 2, 2, 2, 3, 1, 3, 4, 4, 4, 4, 2, 3, 1, 4, 1, …
## $ UPPS_P_2  <dbl> 2, 1, 2, 1, 2, 3, 1, 2, 1, 1, 1, 1, 2, 1, 4, 2, 3, 2, 2, 1, …
## $ UPPS_P_3  <dbl> 4, 3, 1, 3, 4, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 3, 3, 3, 4, 2, …
## $ UPPS_P_4  <dbl> 1, 1, 1, 4, 3, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 3, 1, …
## $ UPPS_P_5  <dbl> 1, 1, 1, 4, 2, 1, 1, 3, 1, 1, 2, 1, 2, 2, 4, 1, 1, 1, 2, 1, …
## $ UPPS_P_6  <dbl> 2, 1, 1, 3, 1, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 1, …
## $ UPPS_P_7  <dbl> 3, 1, 3, 3, 3, 3, 1, 1, 2, 1, 3, 3, 4, 2, 4, 3, 3, 3, 3, 1, …
## $ UPPS_P_8  <dbl> 3, 2, 1, 4, 2, 4, 3, 2, 3, 3, 1, 4, 3, 3, 2, 3, 2, 4, 4, 1, …
## $ UPPS_P_9  <dbl> 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, …
## $ UPPS_P_10 <dbl> 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 4, 1, 1, 1, 3, 1, …
## $ UPPS_P_11 <dbl> 1, 1, 2, 4, 4, 2, 3, 1, 2, 2, 2, 1, 1, 1, 1, 3, 4, 1, 4, 1, …
## $ UPPS_P_12 <dbl> 3, 1, 2, 4, 3, 2, 1, 2, 2, 1, 4, 2, 1, 2, 3, 3, 3, 3, 2, 2, …
## $ UPPS_P_13 <dbl> 3, 3, 1, 2, 3, 3, 1, 3, 2, 1, 4, 3, 1, 4, 4, 4, 4, 3, 4, 3, …
## $ UPPS_P_14 <dbl> 2, 2, 2, 2, 1, 3, 2, 1, 2, 1, 1, 1, 3, 1, 4, 1, 2, 2, 3, 3, …
## $ UPPS_P_15 <dbl> 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 2, 1, …
## $ UPPS_P_16 <dbl> 2, 1, 1, 3, 1, 2, 1, 2, 3, 1, 2, 2, 2, 1, 3, 2, 3, 1, 4, 1, …
## $ UPPS_P_17 <dbl> 3, 1, 1, 3, 2, 3, 1, 1, 2, 4, 2, 1, 1, 2, 4, 3, 1, 1, 3, 1, …
## $ UPPS_P_18 <dbl> 4, 3, 2, 4, 3, 4, 4, 4, 3, 3, 3, 4, 3, 4, 2, 3, 3, 4, 4, 4, …
## $ UPPS_P_19 <dbl> 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 3, …
## $ UPPS_P_20 <dbl> 1, 1, 1, 3, 3, 2, 1, 2, 1, 1, 2, 1, 3, 2, 3, 3, 1, 1, 3, 1, …
## $ UPPS_P_21 <dbl> 3, 2, 1, 3, 1, 1, 3, 2, 3, 1, 3, 2, 4, 1, 4, 2, 3, 3, 4, 2, …
## $ UPPS_P_22 <dbl> 2, 1, 1, 3, 3, 4, 1, 2, 1, 1, 1, 3, 4, 1, 4, 3, 3, 3, 3, 1, …
## $ UPPS_P_23 <dbl> 4, 3, 1, 4, 4, 2, 1, 3, 3, 1, 3, 4, 4, 4, 4, 4, 2, 2, 4, 4, …
## $ UPPS_P_24 <dbl> 1, 1, 3, 3, 2, 3, 1, 3, 1, 2, 1, 1, 4, 1, 3, 3, 4, 4, 4, 1, …
## $ UPPS_P_25 <dbl> 1, 1, 1, 4, 4, 1, 1, 3, 1, 1, 2, 1, 3, 1, 3, 2, 1, 2, 3, 1, …
## $ UPPS_P_26 <dbl> 4, 2, 2, 3, 2, 4, 3, 4, 2, 3, 1, 4, 4, 4, 4, 2, 3, 4, 4, 4, …
## $ UPPS_P_27 <dbl> 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 3, 1, …
## $ UPPS_P_28 <dbl> 2, 1, 2, 4, 1, 1, 1, 1, 3, 1, 2, 1, 4, 1, 2, 2, 3, 1, 3, 1, …
## $ UPPS_P_29 <dbl> 1, 1, 1, 4, 2, 3, 1, 2, 1, 2, 1, 2, 3, 2, 4, 3, 4, 1, 3, 3, …
## $ UPPS_P_30 <dbl> 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 3, 1, 1, 3, 1, …
## $ UPPS_P_31 <dbl> 3, 3, 2, 2, 3, 4, 2, 2, 4, 2, 3, 4, 4, 4, 4, 3, 2, 4, 4, 3, …
## $ UPPS_P_32 <dbl> 2, 1, 3, 1, 3, 3, 2, 2, 2, 1, 2, 1, 4, 1, 1, 2, 2, 3, 2, 1, …
## $ UPPS_P_33 <dbl> 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 4, 1, 1, 2, 2, 2, 3, 1, …
## $ UPPS_P_34 <dbl> 2, 2, 2, 2, 3, 4, 1, 2, 2, 1, 2, 1, 3, 2, 3, 4, 2, 3, 1, 3, …
## $ UPPS_P_35 <dbl> 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, …
## $ UPPS_P_36 <dbl> 1, 2, 1, 1, 3, 4, 2, 3, 2, 1, 1, 4, 3, 4, 4, 2, 1, 4, 4, 4, …
## $ UPPS_P_37 <dbl> 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 4, 1, 2, 1, 3, 1, 2, 1, …
## $ UPPS_P_38 <dbl> 2, 2, 1, 3, 2, 2, 1, 2, 4, 1, 3, 3, 4, 3, 2, 2, 3, 1, 4, 1, …
## $ UPPS_P_39 <dbl> 2, 1, 2, 4, 4, 2, 2, 3, 2, 2, 2, 2, 4, 2, 2, 3, 4, 2, 2, 2, …
## $ UPPS_P_40 <dbl> 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, 1, …
## $ UPPS_P_41 <dbl> 3, 3, 1, 4, 3, 4, 1, 2, 3, 1, 3, 4, 4, 4, 2, 3, 4, 2, 4, 4, …
## $ UPPS_P_42 <dbl> 1, 2, 2, 3, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 3, 1, …
## $ UPPS_P_43 <dbl> 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 4, 1, …
## $ UPPS_P_44 <dbl> 1, 1, 1, 4, 2, 3, 1, 2, 1, 1, 1, 1, 2, 2, 2, 4, 3, 1, 1, 1, …
## $ UPPS_P_45 <dbl> 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 3, 1, …
## $ UPPS_P_46 <dbl> 3, 4, 2, 1, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4, 4, 3, 1, 4, 4, 4, …
## $ UPPS_P_47 <dbl> 2, 2, 4, 3, 3, 3, 2, 3, 3, 3, 4, 1, 4, 2, 3, 3, 3, 3, 2, 1, …
## $ UPPS_P_48 <dbl> 2, 1, 1, 3, 2, 1, 1, 2, 2, 1, 3, 2, 3, 1, 3, 2, 3, 1, 3, 1, …
## $ UPPS_P_49 <dbl> 1, 1, 1, 4, 2, 1, 1, 2, 1, 1, 1, 1, 4, 1, 3, 3, 1, 1, 3, 1, …
## $ UPPS_P_50 <dbl> 1, 2, 2, 2, 4, 4, 1, 2, 1, 1, 2, 3, 3, 3, 4, 2, 4, 3, 3, 3, …
## $ UPPS_P_51 <dbl> 3, 4, 3, 1, 4, 4, 4, 4, 4, 3, 1, 4, 4, 4, 1, 3, 4, 4, 4, 4, …
## $ UPPS_P_52 <dbl> 2, 1, 1, 2, 4, 1, 1, 2, 1, 1, 1, 1, 4, 1, 2, 2, 1, 1, 3, 1, …
## $ UPPS_P_53 <dbl> 2, 2, 2, 2, 3, 3, 3, 2, 2, 1, 2, 2, 4, 1, 4, 2, 3, 1, 2, 2, …
## $ UPPS_P_54 <dbl> 2, 1, 1, 4, 2, 2, 1, 2, 1, 1, 2, 1, 3, 1, 4, 3, 1, 1, 1, 1, …
## $ UPPS_P_55 <dbl> 2, 2, 2, 3, 2, 2, 1, 2, 3, 1, 2, 2, 2, 1, 3, 2, 2, 1, 3, 1, …
## $ UPPS_P_56 <dbl> 4, 3, 2, 4, 4, 4, 3, 4, 2, 1, 3, 4, 3, 4, 4, 2, 2, 4, 4, 4, …
## $ UPPS_P_57 <dbl> 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3, 2, 4, 2, 3, …
## $ UPPS_P_58 <dbl> 3, 1, 1, 2, 3, 3, 1, 2, 1, 2, 2, 3, 4, 2, 3, 3, 3, 3, 3, 3, …
## $ UPPS_P_59 <dbl> 2, 1, 1, 4, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 4, 3, 3, 3, 3, 1, …
## $ NU_Avg    <dbl> 2.083333, 1.250000, 1.666667, 2.833333, 2.833333, 3.083333, …
## $ PU_Avg    <dbl> 1.500000, 1.142857, 1.142857, 3.000000, 2.428571, 1.285714, …
## $ SS_Avg    <dbl> 3.250000, 2.916667, 1.583333, 2.750000, 3.166667, 3.750000, …
## $ LoPM_Avg  <dbl> 2.000000, 1.545455, 1.363636, 2.818182, 1.818182, 1.636364, …
## $ LoPER_Avg <dbl> 1.5, 1.4, 2.4, 2.3, 2.0, 2.1, 1.4, 1.6, 1.5, 1.4, 1.6, 1.1, …

Measure Missingness, Means, & SDs

UPPSP_df %>% 
  select(Condition, NU_Avg, PU_Avg) %>% 
  vis_miss()

UPPSP_df %>% 
  select(Condition, NU_Avg, PU_Avg) %>% 
  drop_na() %>% 
  group_by(Condition) %>% 
  summarise_all(list(M = mean, med = median, SD = sd))

## # A tibble: 3 × 7
##   Condition NU_Avg_M PU_Avg_M NU_Avg_med PU_Avg_med NU_Avg_SD PU_Avg_SD
##   <fct>        <dbl>    <dbl>      <dbl>      <dbl>     <dbl>     <dbl>
## 1 Negative      2.23     1.83       2.25       1.79     0.629     0.569
## 2 Neutral       2.23     1.79       2.25       1.64     0.612     0.575
## 3 Positive      2.33     1.93       2.25       1.93     0.566     0.576

9) Joining Dataframes

Demo_Drug_df <- left_join(Demo_df, Drug_df, by = c("ID", "Condition"))
Demo_Drug_Mood_df <- left_join(Demo_Drug_df, Mood_df, by = c("ID", "Condition"))
Full_df <- left_join(Demo_Drug_Mood_df, UPPSP_df, by = c("ID", "Condition"))
glimpse(Full_df)

## Rows: 597
## Columns: 94
## $ ID                   <int> 23, 24, 27, 28, 47, 48, 49, 50, 51, 52, 53, 54, 5…
## $ Condition            <fct> Neutral, Negative, Neutral, Negative, Negative, N…
## $ Age                  <dbl> 22, 21, 21, 24, 20, 19, 19, 19, 20, 19, 19, 20, 2…
## $ SAB.f                <fct> Female, Male, Female, Female, Male, Male, Female,…
## $ Gender.f             <fct> Female, Male, Female, Non-binary, Male, Male, Fem…
## $ Sexual_Orientation.f <fct> Heterosexual, Heterosexual, Heterosexual, Asexual…
## $ Race_Ethnicity.f     <fct> Hispanic or Latino, White (non-Hispanic), White (…
## $ Student_Status.f     <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes,…
## $ Student_Year.f       <fct> Senior, Junior, Senior, Senior, Sophomore, Freshm…
## $ Marital_Status.f     <fct> Single, Single, Single, Married, Single, Single, …
## $ Employment.f         <fct> Unemployed, Employed 1-20 hours per week, Employe…
## $ Native_Language.f    <fct> English, English, English, English, English, Engl…
## $ AUDIT1.f             <fct> 2-4x/month, Monthly or less, 2-3x/week, 2-4x/mont…
## $ AUDIT2.f             <fct> 3-4, 1-2, 1-2, 3-4, 3-4, 3-4, 3-4, 5-6, 1-2, 1-2,…
## $ AUDIT3.f             <fct> < Monthly, Never, < Monthly, < Monthly, Never, Mo…
## $ AUDIT_Sum            <dbl> 6, 1, 6, 14, 3, 10, 4, 9, 1, 1, 3, 7, 8, 2, 20, 1…
## $ DUDIT_Sum            <dbl> 0, 9, 4, 24, 0, 3, 0, 0, 0, 0, 0, 3, 3, 0, 1, 0, …
## $ MINI_AUD_Sum         <dbl> 7, 0, 9, 6, 1, 1, 1, 3, 0, 0, 2, 4, 4, 2, 7, 0, 0…
## $ MINI_SUD_Sum         <dbl> 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 3, 0, 3, 0, 0…
## $ MINI_AUD_Dx          <fct> Severe, None, Severe, Severe, None, None, None, M…
## $ MINI_SUD_Dx          <fct> None, None, None, None, None, None, None, None, N…
## $ Date_Last_Drank      <date> 2020-03-25, 2020-03-31, 2020-03-31, 1901-01-01, …
## $ Favorite_Alcohol.f   <fct> Wine, Beer, Wine, Beer, Liquor/Spirits, Beer, Liq…
## $ Favorite_Caff.f      <fct> Coffee, Coffee, Coffee, Coffee, Coffee, Tea, Coff…
## $ AG1                  <dbl> 16, 42, 73, 77, 61, 62, 59, 60, 52, 60, 52, 62, 5…
## $ AG2                  <dbl> 41, 40, 73, 73, 56, 35, 41, 29, 52, 21, 62, 68, 3…
## $ AG1_Valence          <dbl> 7, 6, 1, 5, 7, 8, 5, 6, 7, 6, 7, 8, 8, 9, 5, 7, 7…
## $ AG1_Arousal          <dbl> 2, 5, 9, 9, 7, 7, 7, 7, 6, 7, 6, 7, 6, 6, 9, 7, 6…
## $ AG2_Valence          <dbl> 5, 4, 1, 1, 2, 8, 5, 2, 7, 3, 8, 5, 3, 9, 8, 4, 7…
## $ AG2_Arousal          <dbl> 5, 5, 9, 9, 7, 4, 5, 4, 6, 3, 7, 8, 5, 7, 7, 4, 8…
## $ UPPS_P_1             <dbl> 2, 3, 1, 1, 2, 2, 2, 2, 3, 1, 3, 4, 4, 4, 4, 2, 3…
## $ UPPS_P_2             <dbl> 2, 1, 2, 1, 2, 3, 1, 2, 1, 1, 1, 1, 2, 1, 4, 2, 3…
## $ UPPS_P_3             <dbl> 4, 3, 1, 3, 4, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 3, 3…
## $ UPPS_P_4             <dbl> 1, 1, 1, 4, 3, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2…
## $ UPPS_P_5             <dbl> 1, 1, 1, 4, 2, 1, 1, 3, 1, 1, 2, 1, 2, 2, 4, 1, 1…
## $ UPPS_P_6             <dbl> 2, 1, 1, 3, 1, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2…
## $ UPPS_P_7             <dbl> 3, 1, 3, 3, 3, 3, 1, 1, 2, 1, 3, 3, 4, 2, 4, 3, 3…
## $ UPPS_P_8             <dbl> 3, 2, 1, 4, 2, 4, 3, 2, 3, 3, 1, 4, 3, 3, 2, 3, 2…
## $ UPPS_P_9             <dbl> 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2…
## $ UPPS_P_10            <dbl> 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 4, 1, 1…
## $ UPPS_P_11            <dbl> 1, 1, 2, 4, 4, 2, 3, 1, 2, 2, 2, 1, 1, 1, 1, 3, 4…
## $ UPPS_P_12            <dbl> 3, 1, 2, 4, 3, 2, 1, 2, 2, 1, 4, 2, 1, 2, 3, 3, 3…
## $ UPPS_P_13            <dbl> 3, 3, 1, 2, 3, 3, 1, 3, 2, 1, 4, 3, 1, 4, 4, 4, 4…
## $ UPPS_P_14            <dbl> 2, 2, 2, 2, 1, 3, 2, 1, 2, 1, 1, 1, 3, 1, 4, 1, 2…
## $ UPPS_P_15            <dbl> 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 3, 1, 1…
## $ UPPS_P_16            <dbl> 2, 1, 1, 3, 1, 2, 1, 2, 3, 1, 2, 2, 2, 1, 3, 2, 3…
## $ UPPS_P_17            <dbl> 3, 1, 1, 3, 2, 3, 1, 1, 2, 4, 2, 1, 1, 2, 4, 3, 1…
## $ UPPS_P_18            <dbl> 4, 3, 2, 4, 3, 4, 4, 4, 3, 3, 3, 4, 3, 4, 2, 3, 3…
## $ UPPS_P_19            <dbl> 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2…
## $ UPPS_P_20            <dbl> 1, 1, 1, 3, 3, 2, 1, 2, 1, 1, 2, 1, 3, 2, 3, 3, 1…
## $ UPPS_P_21            <dbl> 3, 2, 1, 3, 1, 1, 3, 2, 3, 1, 3, 2, 4, 1, 4, 2, 3…
## $ UPPS_P_22            <dbl> 2, 1, 1, 3, 3, 4, 1, 2, 1, 1, 1, 3, 4, 1, 4, 3, 3…
## $ UPPS_P_23            <dbl> 4, 3, 1, 4, 4, 2, 1, 3, 3, 1, 3, 4, 4, 4, 4, 4, 2…
## $ UPPS_P_24            <dbl> 1, 1, 3, 3, 2, 3, 1, 3, 1, 2, 1, 1, 4, 1, 3, 3, 4…
## $ UPPS_P_25            <dbl> 1, 1, 1, 4, 4, 1, 1, 3, 1, 1, 2, 1, 3, 1, 3, 2, 1…
## $ UPPS_P_26            <dbl> 4, 2, 2, 3, 2, 4, 3, 4, 2, 3, 1, 4, 4, 4, 4, 2, 3…
## $ UPPS_P_27            <dbl> 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2…
## $ UPPS_P_28            <dbl> 2, 1, 2, 4, 1, 1, 1, 1, 3, 1, 2, 1, 4, 1, 2, 2, 3…
## $ UPPS_P_29            <dbl> 1, 1, 1, 4, 2, 3, 1, 2, 1, 2, 1, 2, 3, 2, 4, 3, 4…
## $ UPPS_P_30            <dbl> 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 3, 1…
## $ UPPS_P_31            <dbl> 3, 3, 2, 2, 3, 4, 2, 2, 4, 2, 3, 4, 4, 4, 4, 3, 2…
## $ UPPS_P_32            <dbl> 2, 1, 3, 1, 3, 3, 2, 2, 2, 1, 2, 1, 4, 1, 1, 2, 2…
## $ UPPS_P_33            <dbl> 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 4, 1, 1, 2, 2…
## $ UPPS_P_34            <dbl> 2, 2, 2, 2, 3, 4, 1, 2, 2, 1, 2, 1, 3, 2, 3, 4, 2…
## $ UPPS_P_35            <dbl> 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1…
## $ UPPS_P_36            <dbl> 1, 2, 1, 1, 3, 4, 2, 3, 2, 1, 1, 4, 3, 4, 4, 2, 1…
## $ UPPS_P_37            <dbl> 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 4, 1, 2, 1, 3…
## $ UPPS_P_38            <dbl> 2, 2, 1, 3, 2, 2, 1, 2, 4, 1, 3, 3, 4, 3, 2, 2, 3…
## $ UPPS_P_39            <dbl> 2, 1, 2, 4, 4, 2, 2, 3, 2, 2, 2, 2, 4, 2, 2, 3, 4…
## $ UPPS_P_40            <dbl> 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1…
## $ UPPS_P_41            <dbl> 3, 3, 1, 4, 3, 4, 1, 2, 3, 1, 3, 4, 4, 4, 2, 3, 4…
## $ UPPS_P_42            <dbl> 1, 2, 2, 3, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2…
## $ UPPS_P_43            <dbl> 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2…
## $ UPPS_P_44            <dbl> 1, 1, 1, 4, 2, 3, 1, 2, 1, 1, 1, 1, 2, 2, 2, 4, 3…
## $ UPPS_P_45            <dbl> 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1…
## $ UPPS_P_46            <dbl> 3, 4, 2, 1, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4, 4, 3, 1…
## $ UPPS_P_47            <dbl> 2, 2, 4, 3, 3, 3, 2, 3, 3, 3, 4, 1, 4, 2, 3, 3, 3…
## $ UPPS_P_48            <dbl> 2, 1, 1, 3, 2, 1, 1, 2, 2, 1, 3, 2, 3, 1, 3, 2, 3…
## $ UPPS_P_49            <dbl> 1, 1, 1, 4, 2, 1, 1, 2, 1, 1, 1, 1, 4, 1, 3, 3, 1…
## $ UPPS_P_50            <dbl> 1, 2, 2, 2, 4, 4, 1, 2, 1, 1, 2, 3, 3, 3, 4, 2, 4…
## $ UPPS_P_51            <dbl> 3, 4, 3, 1, 4, 4, 4, 4, 4, 3, 1, 4, 4, 4, 1, 3, 4…
## $ UPPS_P_52            <dbl> 2, 1, 1, 2, 4, 1, 1, 2, 1, 1, 1, 1, 4, 1, 2, 2, 1…
## $ UPPS_P_53            <dbl> 2, 2, 2, 2, 3, 3, 3, 2, 2, 1, 2, 2, 4, 1, 4, 2, 3…
## $ UPPS_P_54            <dbl> 2, 1, 1, 4, 2, 2, 1, 2, 1, 1, 2, 1, 3, 1, 4, 3, 1…
## $ UPPS_P_55            <dbl> 2, 2, 2, 3, 2, 2, 1, 2, 3, 1, 2, 2, 2, 1, 3, 2, 2…
## $ UPPS_P_56            <dbl> 4, 3, 2, 4, 4, 4, 3, 4, 2, 1, 3, 4, 3, 4, 4, 2, 2…
## $ UPPS_P_57            <dbl> 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3, 2…
## $ UPPS_P_58            <dbl> 3, 1, 1, 2, 3, 3, 1, 2, 1, 2, 2, 3, 4, 2, 3, 3, 3…
## $ UPPS_P_59            <dbl> 2, 1, 1, 4, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 4, 3, 3…
## $ NU_Avg               <dbl> 2.083333, 1.250000, 1.666667, 2.833333, 2.833333,…
## $ PU_Avg               <dbl> 1.500000, 1.142857, 1.142857, 3.000000, 2.428571,…
## $ SS_Avg               <dbl> 3.250000, 2.916667, 1.583333, 2.750000, 3.166667,…
## $ LoPM_Avg             <dbl> 2.000000, 1.545455, 1.363636, 2.818182, 1.818182,…
## $ LoPER_Avg            <dbl> 1.5, 1.4, 2.4, 2.3, 2.0, 2.1, 1.4, 1.6, 1.5, 1.4,…

10) Correlations Amongst Measures

Full_df %>% 
  select(NU_Avg, PU_Avg, MINI_AUD_Sum, AUDIT_Sum, AG1_Valence, AG2_Valence) %>%
  PerformanceAnalytics::chart.Correlation()

11) Writing data to csv

Code below hased out to prevent continual re-writing of csv upon markdown publications.

#Full_df %>% write_csv("/Users/noahwolkowicz/Desktop/CT/West Haven/Postdoc/Postdoc Research/Jenn & Noah Collab/Data/JN_Collab_6.16.22.csv")

JN_R_Markdown

Noah R. Wolkowicz

6/16/2022

0) Intro Notes:

1) Packages & R Markdown Setup

2) Data Read-In

Dimensions of Initial Data Set

3) Inattentive Responders

4) People Who are Missing (have “NA” for) Condition Variable

So what’s going on with this one person who’s missing their condition variable

but appears to have completed the study?

5) Demograhpics

Demographics Missingness & Table

Demographic Statistics

6) Substance Use

Substance Use Missingness & Table

7) Mood Induction

Mood Induction Effectiveness Across Conditions

Paired T-tests of mood valence pre-post induction

8) UPPS-P: Negative Urgency & Positive Urgency Info

UPPS-P Dataframe

Measure Missingness, Means, & SDs

9) Joining Dataframes

10) Correlations Amongst Measures

11) Writing data to csv

Code below hased out to prevent continual re-writing of csv upon markdown publications.