rm(list=ls())
Load packages
# load packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.3 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(dplyr)
library(doBy)
##
## Attaching package: 'doBy'
## The following object is masked from 'package:dplyr':
##
## order_by
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(sjPlot)
## Registered S3 methods overwritten by 'parameters':
## method from
## as.double.parameters_kurtosis datawizard
## as.double.parameters_skewness datawizard
## as.double.parameters_smoothness datawizard
## as.numeric.parameters_kurtosis datawizard
## as.numeric.parameters_skewness datawizard
## as.numeric.parameters_smoothness datawizard
## print.parameters_distribution datawizard
## print.parameters_kurtosis datawizard
## print.parameters_skewness datawizard
## summary.parameters_kurtosis datawizard
## summary.parameters_skewness datawizard
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
Read in REDCAP data
# load wave 2 data
wave2 <- read.csv("/Users/caitlinbrown/Box/LifeSense/Conversation\ Sentiment/wave2_redcap.csv",
header = T, na.strings = c("", "99", "999"))
# load wave 3 data
wave3 <- read.csv("/Users/caitlinbrown/Box/LifeSense/Conversation\ Sentiment/wave3_redcap.csv",
header = T, na.strings = c("", "99", "999"))
# make demo_household_num compatible types so we can merge later
wave2$demo_household_num <- as.integer(wave2$demo_household_num)
wave3$demo_household_num <- as.integer(wave3$demo_household_num)
## Warning: NAs introduced by coercion
[Warning - big files] Read in Sentiment data
sentiment2 <- read.csv("/Users/caitlinbrown/Box/LifeSense/Conversation\ Sentiment/wave2_msg_sentiment.csv", header = T, na.strings = c("", "99", "999"))
sentiment3 <- read.csv("/Users/caitlinbrown/Box/LifeSense/Conversation\ Sentiment/wave3_msg_sentiment.csv", header = T, na.strings = c("", "99", "999"))
Read in PHQ data
phq2 <- read.csv("/Users/caitlinbrown/Box/LifeSense/Conversation\ Sentiment/wave2_phq.csv", header = T, na.strings = c("", "99", "999"))
phq3 <- read.csv("/Users/caitlinbrown/Box/LifeSense/Conversation\ Sentiment/wave3_phq.csv", header = T, na.strings = c("", "99", "999"))
Prep to merge PHQ with other REDCAP data
# rename columns for clarity - wave 2
wave2 <- rename(wave2, week = study_idx)
phq2 <- rename(phq2, week = study_wk)
phq2$pid <- as.integer(phq2$pid)
wave2$pid <- as.integer(wave2$pid)
phq2 <- phq2 %>% select(pid, phq_tot, week)
phq2 <- phq2 %>% drop_na(week)
# rename columns for clarity - wave 3
wave3 <- rename(wave3, week = study_idx)
phq3 <- rename(phq3, week = study_wk)
phq3$pid <- as.integer(phq3$pid)
wave3$pid <- as.integer(wave3$pid)
phq3 <- phq3 %>% select(pid, phq_tot, week)
phq3 <- phq3 %>% drop_na(week)
Compute mean PHQ per subject per week
# also in preparation for merging PHQ and other data, we need to compute the mean PHQ per subject per week
phq2avg <- phq2 %>% group_by (pid, week) %>% summarise(phq_mean = mean(phq_tot))
## `summarise()` has grouped output by 'pid'. You can override using the `.groups` argument.
phq3avg <- phq3 %>% group_by (pid, week) %>% summarise(phq_mean = mean(phq_tot))
## `summarise()` has grouped output by 'pid'. You can override using the `.groups` argument.
Merge PHQ with REDCAP data
# Week 2
phq2avg$week <- as.integer(phq2avg$week)
wave2$week <- as.integer(wave2$week)
wave2 <- left_join(wave2, phq2avg)
## Joining, by = c("pid", "week")
# Week 3
phq3avg$week <- as.integer(phq3avg$week)
wave3$week <- as.integer(wave3$week)
wave3 <- left_join(wave3, phq3avg)
## Joining, by = c("pid", "week")
To prep for cleaning the sentiment date data, we need to read in lookup file to identify which wave each sub was in
# create a Wave 2 group(s) lookup file for joining with our text message data
# group 1
wave2grp1 <- read_excel("~/Box/LifeSense/LSWave2/LS_Wave2_REDCap_FPGgrp1_wk16_06-08-20.xlsx", sheet = "ID_map_withdraw_enroll_grp ")
wave2grp1$pid <- as.integer(wave2grp1$app_id)
wave2grp1$grp <- 1
wave2grp1 <- wave2grp1 %>% select(c(pid, grp))
# group 2
wave2grp2 <- read_excel("~/Box/LifeSense/LSWave2/LS_Wave2_REDCap_FPGgrp2_120820.xlsx", sheet = "ID_map_withdraw_enroll_grp ")
wave2grp2$pid <- as.integer(wave2grp2$app_id)
wave2grp2$grp <- 2
wave2grp2 <- wave2grp2 %>% select(c(pid, grp))
# merge groups 1 and 2
wave2grp <- rbind(wave2grp1, wave2grp2)
# left join Wave 2 group number with text message data on pid
sentiment2 <- left_join(sentiment2, wave2grp)
## Joining, by = "pid"
# checking join
na_check2 = sentiment2[is.na(sentiment2$grp), ]
# split sentiment2 file into 2 based on grp
sentiment2grp1 <- sentiment2 %>% filter(grp == 1)
sentiment2grp2 <- sentiment2 %>% filter(grp == 2)
#---
# create a Wave 3 group(s) lookup file for joining with our text message data
wave3grp <- read_excel("~/Box/LifeSense/LSWave3/LS_Wave3_REDCap_grp1_grp2_061121.xlsx", sheet = "ID_map_withdraw_enroll_grp")
wave3grp$pid <- as.integer(wave3grp$app_id)
wave3grp$grp <- ifelse(wave3grp$enrollment_grp == "Wave 3_Android_Social_Media_Registry_Group_1", 1,
ifelse(wave3grp$enrollment_grp == "Wave 3_Android_Social_Media_Registry_Group_2", 2, NA))
wave3grp <- wave3grp %>% select(c(pid, grp))
# left join Wave 3 group number with text message data on pid
sentiment3 <- left_join(sentiment3, wave3grp)
## Joining, by = "pid"
# checking join
na_check3 = sentiment3[is.na(sentiment3$grp), ]
# split sentiment3 file into 2 based on grp
sentiment3grp1 <- sentiment3 %>% filter(grp == 1)
sentiment3grp2 <- sentiment3 %>% filter(grp == 2)
Create week variables based on dates in sentiment data
# Create week variables for Wave 2
sentiment2grp1$date <- as.Date(sentiment2grp1$date)
sentiment2grp1$week <- ifelse(sentiment2grp1$date < "2020-02-04", NA,
ifelse(sentiment2grp1$date < "2020-02-11", 0, ifelse(sentiment2grp1$date < "2020-02-18", 1,
ifelse(sentiment2grp1$date < "2020-02-25", 2, ifelse(sentiment2grp1$date < "2020-03-03", 3,
ifelse(sentiment2grp1$date < "2020-03-10", 4, ifelse(sentiment2grp1$date < "2020-03-17", 5,
ifelse(sentiment2grp1$date < "2020-03-24", 6, ifelse(sentiment2grp1$date < "2020-03-31", 7,
ifelse(sentiment2grp1$date < "2020-04-07", 8, ifelse(sentiment2grp1$date < "2020-04-14", 9,
ifelse(sentiment2grp1$date < "2020-04-21", 10, ifelse(sentiment2grp1$date < "2020-04-28", 11,
ifelse(sentiment2grp1$date < "2020-05-05", 12, ifelse(sentiment2grp1$date < "2020-05-12", 13,
ifelse(sentiment2grp1$date < "2020-05-19", 14, ifelse(sentiment2grp1$date < "2020-05-26", 15,
ifelse(sentiment2grp1$date < "2020-06-01", 16,
NA))))))))))))))))))
sentiment2grp2$date <- as.Date(sentiment2grp2$date)
sentiment2grp2$week <- ifelse(sentiment2grp2$date < "2020-04-06", NA, ifelse(sentiment2grp2$date < "2020-04-14", 0,
ifelse(sentiment2grp2$date < "2020-04-21", 1, ifelse(sentiment2grp2$date < "2020-04-28", 2,
ifelse(sentiment2grp2$date < "2020-05-05", 3, ifelse(sentiment2grp2$date < "2020-05-12", 4,
ifelse(sentiment2grp2$date < "2020-05-19", 5, ifelse(sentiment2grp2$date < "2020-05-26", 6,
ifelse(sentiment2grp2$date < "2020-06-02", 7, ifelse(sentiment2grp2$date < "2020-06-09", 8,
ifelse(sentiment2grp2$date < "2020-06-16", 9, ifelse(sentiment2grp2$date < "2020-06-23", 10,
ifelse(sentiment2grp2$date < "2020-06-30", 11, ifelse(sentiment2grp2$date < "2020-07-07", 12,
ifelse(sentiment2grp2$date < "2020-07-14", 13, ifelse(sentiment2grp2$date < "2020-07-21", 14,
ifelse(sentiment2grp2$date < "2020-07-28", 15, ifelse(sentiment2grp2$date < "2020-08-04", 16,
NA))))))))))))))))))
sentiment2 <- rbind(sentiment2grp1, sentiment2grp2)
# Create week variables for Wave 3
sentiment3grp1$date <- as.Date(sentiment3grp1$date)
sentiment3grp1$week <- ifelse(sentiment3grp1$date < "2021-01-11", NA,
ifelse(sentiment3grp1$date < "2021-01-19", 0, ifelse(sentiment3grp1$date < "2021-01-26", 1,
ifelse(sentiment3grp1$date < "2021-02-02", 2, ifelse(sentiment3grp1$date < "2021-02-09", 3,
ifelse(sentiment3grp1$date < "2021-02-16", 4, ifelse(sentiment3grp1$date < "2021-02-23", 5,
ifelse(sentiment3grp1$date < "2021-03-02", 6, ifelse(sentiment3grp1$date < "2021-03-09", 7,
ifelse(sentiment3grp1$date < "2021-03-16", 8, ifelse(sentiment3grp1$date < "2021-03-23", 9,
ifelse(sentiment3grp1$date < "2021-03-30", 10, ifelse(sentiment3grp1$date < "2021-04-06", 11,
ifelse(sentiment3grp1$date < "2021-04-13", 12, ifelse(sentiment3grp1$date < "2021-04-20", 13,
ifelse(sentiment3grp1$date < "2021-04-27", 14, ifelse(sentiment3grp1$date < "2021-05-04", 15,
ifelse(sentiment3grp1$date < "2021-05-11", 16,
NA))))))))))))))))))
sentiment3grp2$date <- as.Date(sentiment3grp2$date)
sentiment3grp2$week <- ifelse(sentiment3grp2$date < "2021-02-01", NA, ifelse(sentiment3grp2$date < "2021-02-09", 0,
ifelse(sentiment3grp2$date < "2021-02-16", 1, ifelse(sentiment3grp2$date < "2021-02-23", 2,
ifelse(sentiment3grp2$date < "2021-03-02", 3, ifelse(sentiment3grp2$date < "2021-03-09", 4,
ifelse(sentiment3grp2$date < "2021-03-16", 5, ifelse(sentiment3grp2$date < "2021-03-23", 6,
ifelse(sentiment3grp2$date < "2021-03-30", 7, ifelse(sentiment3grp2$date < "2021-04-06", 8,
ifelse(sentiment3grp2$date < "2021-04-13", 9, ifelse(sentiment3grp2$date < "2021-04-20", 10,
ifelse(sentiment3grp2$date < "2021-04-27", 11, ifelse(sentiment3grp2$date < "2021-05-04", 12,
ifelse(sentiment3grp2$date < "2021-05-11", 13, ifelse(sentiment3grp2$date < "2021-05-18", 14,
ifelse(sentiment3grp2$date < "2021-05-25", 15, ifelse(sentiment3grp2$date < "2021-06-01", 16,
NA))))))))))))))))))
sentiment3 <- rbind(sentiment3grp1, sentiment3grp2)
Collapse sentiment data across 2 week intervals within person
# Create aggregated week variables for Wave 2 - 2 week intervals
#x1 2 3 - 4
#x4 5 6 - 7
#x7 8 9 - 10
#x10 11 12 - 13
#x13 14 15 - 16
# note that below we're recoding such that weeks 2 and 3 are labeled 4, which will allow us to merge with the week 4 symptom data and look at lagged associations (same thing for weeks 5 and 6 with symptom data at week 7, etc. etc.)
sentiment2$weekagg <- ifelse(sentiment2$week == 0, NA, ifelse(sentiment2$week == 2, 4, ifelse(sentiment2$week == 3, 4,
ifelse(sentiment2$week == 5, 7, ifelse(sentiment2$week == 6, 7,
ifelse(sentiment2$week == 8, 10, ifelse(sentiment2$week == 9, 10,
ifelse(sentiment2$week == 11, 13, ifelse(sentiment2$week == 12, 13,
ifelse(sentiment2$week == 14, 16, ifelse(sentiment2$week == 15, 16, NA)))))))))))
# filter out people by week if less than 50 messages in 2 week period
sentiment2 <- sentiment2 %>% drop_na(weekagg)
sentiment2 <- sentiment2 %>% group_by(pid, weekagg, direction) %>% filter(n()>49)
# collapse sentiment data over 2 week periods - divide the total score per category by the total length
sentiment2tot <- sentiment2 %>% group_by(pid, weekagg, direction) %>%
filter(!is.na("length")) %>%
summarise_at(vars(contains(c("LIWC2015", "NRC", "stress_stress", "depression_", "pronouns_", "length"))),
.funs = list(tot = sum), na.rm=T)
sentiment2avg <- sentiment2tot %>%
mutate_at(vars(contains(c("LIWC2015", "NRC", "stress_stress", "depression_", "pronouns_"))),
.funs = list(avg = ~.x/length_tot), na.rm=T)
#----
# Create aggregated week variables for Wave 3 - 2 week intervals
sentiment3$weekagg <- ifelse(sentiment3$week == 0, NA, ifelse(sentiment3$week == 2, 4, ifelse(sentiment3$week == 3, 4,
ifelse(sentiment3$week == 5, 7, ifelse(sentiment3$week == 6, 7,
ifelse(sentiment3$week == 8, 10, ifelse(sentiment3$week == 9, 10,
ifelse(sentiment3$week == 11, 13, ifelse(sentiment3$week == 12, 13,
ifelse(sentiment3$week == 14, 16, ifelse(sentiment3$week == 15, 16, NA)))))))))))
# filter out people by week if less than 50 messages in 2 week period
sentiment3 <- sentiment3 %>% drop_na(weekagg)
sentiment3 <- sentiment3 %>% group_by (pid, weekagg, direction) %>% filter(n()>49)
# collapse sentiment data over 2 week periods - divide the total score per category by the total length
sentiment3tot <- sentiment3 %>% group_by(pid, weekagg, direction) %>%
filter(!is.na("length")) %>%
summarise_at(vars(contains(c("LIWC2015", "NRC", "stress_stress", "depression_", "pronouns_", "length"))),
.funs = list(tot = sum), na.rm=T)
sentiment3avg <- sentiment3tot %>%
mutate_at(vars(contains(c("LIWC2015", "NRC", "stress_stress", "depression_", "pronouns_"))),
.funs = list(avg = ~.x/length_tot), na.rm=T)
Merge symptom and sentiment data
# prep to select only data from weeks 4, 7, 10, 13, 16
weekstarget <- c(4, 7, 10, 13, 16)
# select key variables from wave 2
wave2sm <- wave2 %>% select(pid, age, demo_race, demo_gender, demo_hispanic, demo_household_num, tot_spin, tot_gad, phq_mean, week)
# make demographics long format
wave2sm <- wave2sm %>%
group_by(pid) %>%
mutate(Age=age[week==0],
Gender=demo_gender[week==0],
Race=demo_race[week==0],
Hispanic=demo_hispanic[week==0],
Income=demo_household_num[week==0]) %>%
ungroup()
wave2sm <- as.data.frame(wave2sm)
# re-filter to select key variables
wave2sm <- wave2sm %>% select(pid, Age, Race, Gender, Hispanic, Income, tot_spin, tot_gad, phq_mean, week)
# filter to include only necessary weeks
wave2sm <- wave2sm %>% filter(week %in% weekstarget)
# select key variables from wave 3
wave3sm <- wave3 %>% select(pid, age, demo_race, demo_gender, demo_hispanic, demo_household_num, tot_spin, tot_gad, phq_mean, week)
# make demographics long format
wave3sm <- wave3sm %>%
group_by(pid) %>%
mutate(Age=age[week==0],
Gender=demo_gender[week==0],
Race=demo_race[week==0],
Hispanic=demo_hispanic[week==0],
Income=demo_household_num[week==0]) %>%
ungroup()
wave3sm <- as.data.frame(wave3sm)
# re-filter to select key variables
wave3sm <- wave3sm %>% select(pid, Age, Race, Gender, Hispanic, Income, tot_spin, tot_gad, phq_mean, week)
# filter to include only necessary weeks
wave3sm <- wave3sm %>% filter(week %in% weekstarget)
# Wave 2 - replace week variable with weekagg so we can merge with sentiment data
wave2sm <- rename(wave2sm, weekagg = week)
# Wave 3 - replace week variable with weekagg so we can merge with sentiment data
wave3sm <- rename(wave3sm, weekagg = week)
# Merge symptom and sentiment
wave2merge <- right_join(wave2sm, sentiment2avg)
## Joining, by = c("pid", "weekagg")
wave3merge <- right_join(wave3sm, sentiment3avg) # note that this doesn't pull in demographics bc of weeks included - to fix
## Joining, by = c("pid", "weekagg")
# Merge wave 2 and wave 3 data
wave2wave3 <- rbind(wave2merge, wave3merge)
# remove unnecessary columns
wave2wave3 <- wave2wave3 %>% select(c(pid, Age, Race, Gender, Hispanic, Income, tot_spin,
tot_gad, phq_mean, weekagg, direction, length_tot, contains("tot_avg"))) %>%
rename_at(.vars = vars(ends_with("_tot_avg")), .funs = funs(sub("_tot_avg", "_avg", .)))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
# select outgoing only
wave2wave3out <- wave2wave3 %>% filter(direction=="outgoing")
wave2wave3in <- wave2wave3 %>% filter(direction=="incoming")
# centering - example for ANGER variable that we can use as a check for loops
d <- wave2wave3out %>%
# Grand mean centering (CMC)
mutate(LIWC2015_ANGER_avg.gmc = LIWC2015_ANGER_avg-mean(LIWC2015_ANGER_avg, na.rm=T)) %>%
# Person mean centering (more generally, centering within cluster)
group_by(pid) %>%
mutate(LIWC2015_ANGER_avg.cm = mean(LIWC2015_ANGER_avg.gmc, na.rm=T),
LIWC2015_ANGER_avg.cwc = LIWC2015_ANGER_avg-LIWC2015_ANGER_avg.cm) %>%
ungroup %>%
# Grand mean centering of the aggregated variable
mutate(LIWC2015_ANGER_avg.cmc = LIWC2015_ANGER_avg.cm-mean(LIWC2015_ANGER_avg.cm, na.rm=T))
# we will include cwc (deviations per person - within effect) and cmc (aggregated by person - between effect) in MLMs
# centering
wave2wave3out <- bind_cols(wave2wave3out,
wave2wave3out %>%
transmute(across(contains("_avg"), ~ . - mean(., na.rm=T))) %>%
rename_all(~ paste0(., "_gmc")))
wave2wave3out <- bind_cols(wave2wave3out,
wave2wave3out %>%
group_by(pid) %>%
transmute(across(contains("_gmc"), ~ mean(., na.rm=T))) %>%
rename_all(~ paste0(., "_cm")))
wave2wave3out <- bind_cols(wave2wave3out,
wave2wave3out %>%
group_by(pid) %>%
transmute(across(ends_with("_avg")) - across(ends_with("_avg_gmc_cm"))) %>%
rename_all(~ paste0(., "_cwc")))
wave2wave3out <- bind_cols(wave2wave3out,
wave2wave3out %>%
transmute(across(contains("avg_gmc_cm"), ~ . - mean(., na.rm=T))) %>%
rename_all(~ paste0(., "_cmc")))
# quality check - ensure these are the same
d %>%
select(LIWC2015_ANGER_avg, LIWC2015_ANGER_avg.gmc, LIWC2015_ANGER_avg.cm,
LIWC2015_ANGER_avg.cwc, LIWC2015_ANGER_avg.cmc) %>%
summary
## LIWC2015_ANGER_avg LIWC2015_ANGER_avg.gmc LIWC2015_ANGER_avg.cm
## Min. :0.0000000 Min. :-0.0004502 Min. :-4.502e-04
## 1st Qu.:0.0001132 1st Qu.:-0.0003370 1st Qu.:-2.565e-04
## Median :0.0003280 Median :-0.0001223 Median :-9.764e-05
## Mean :0.0004502 Mean : 0.0000000 Mean : 0.000e+00
## 3rd Qu.:0.0006112 3rd Qu.: 0.0001609 3rd Qu.: 1.335e-04
## Max. :0.0110331 Max. : 0.0105829 Max. : 8.450e-03
## LIWC2015_ANGER_avg.cwc LIWC2015_ANGER_avg.cmc
## Min. :-0.0016822 Min. :-4.502e-04
## 1st Qu.: 0.0003119 1st Qu.:-2.565e-04
## Median : 0.0004468 Median :-9.764e-05
## Mean : 0.0004502 Mean : 0.000e+00
## 3rd Qu.: 0.0005745 3rd Qu.: 1.335e-04
## Max. : 0.0027987 Max. : 8.450e-03
wave2wave3out %>%
select(LIWC2015_ANGER_avg, LIWC2015_ANGER_avg_gmc, LIWC2015_ANGER_avg_gmc_cm,
LIWC2015_ANGER_avg_cwc, LIWC2015_ANGER_avg_gmc_cm_cmc) %>%
summary
## LIWC2015_ANGER_avg LIWC2015_ANGER_avg_gmc LIWC2015_ANGER_avg_gmc_cm
## Min. :0.0000000 Min. :-0.0004502 Min. :-4.502e-04
## 1st Qu.:0.0001132 1st Qu.:-0.0003370 1st Qu.:-2.565e-04
## Median :0.0003280 Median :-0.0001223 Median :-9.764e-05
## Mean :0.0004502 Mean : 0.0000000 Mean : 0.000e+00
## 3rd Qu.:0.0006112 3rd Qu.: 0.0001609 3rd Qu.: 1.335e-04
## Max. :0.0110331 Max. : 0.0105829 Max. : 8.450e-03
## LIWC2015_ANGER_avg_cwc LIWC2015_ANGER_avg_gmc_cm_cmc
## Min. :-0.0016822 Min. :-4.502e-04
## 1st Qu.: 0.0003119 1st Qu.:-2.565e-04
## Median : 0.0004468 Median :-9.764e-05
## Mean : 0.0004502 Mean : 0.000e+00
## 3rd Qu.: 0.0005745 3rd Qu.: 1.335e-04
## Max. : 0.0027987 Max. : 8.450e-03
Preliminary tests of multilevel regression models - outgoing only
# we will include cwc (deviations per person - within effect) and cmc (aggregated by person - between effect) in MLMs
# rename cmc columns
wave2wave3out <- wave2wave3out %>%
rename_at(.vars = vars(contains("_cmc")), .funs = funs(sub("gmc_cm_cmc", "cmc", .)))
wave2wave3out <- as.data.frame(wave2wave3out)
# rescale relevant variables
varstoscale = names(wave2wave3out %>% select(c(tot_spin, tot_gad, phq_mean, Age, ends_with("cmc"), ends_with("cwc"), -contains("NRC_Emolex_Hash_category"))))
wave2wave3outscaled <- wave2wave3out %>% mutate_at(varstoscale, ~scale(.))
# make sure Gender is a factor
wave2wave3outscaled$Gender <- as.factor(wave2wave3outscaled$Gender)
# select within person columns
vars_cwc = names(wave2wave3outscaled %>% select(contains("_cwc") & !contains("pid") & !contains("NRC_Emolex_Hash_category")))
# select between person columns
vars_cmc = names(wave2wave3outscaled %>% select(contains("_cmc") & !contains("NRC_Emolex_Hash_category")))
# center week variable at 0
wave2wave3outscaled$weekagg <- wave2wave3outscaled$weekagg - 9.853602
# loop through models - SAD
sadmods <- list()
for (i in 1:length(vars_cwc)) {
f <- formula(paste("tot_spin~(1|pid)+ weekagg + Age + Gender + phq_mean + tot_gad + ", vars_cwc[i], "+", vars_cmc[i]))
sadmods[[i]] <- lmer(f, data=wave2wave3outscaled)
}
# create dataframe to store output
sadoutput <- summary(sadmods[[1]])$coefficients
# loop through to extract coefficients for each model and bind with output
for (i in 2:87) {
a <- summary(sadmods[[i]])$coefficients
sadoutput <- rbind(sadoutput, a)
}
# loop through models - GAD
gadmods <- list()
for (i in 1:length(vars_cwc)) {
f <- formula(paste("tot_gad~(1|pid)+ weekagg + Age + Gender + phq_mean + tot_spin + ", vars_cwc[i], "+", vars_cmc[i]))
gadmods[[i]] <- lmer(f, data=wave2wave3outscaled)
}
# create dataframe to store output
gadoutput <- summary(gadmods[[1]])$coefficients
# loop through to extract coefficients for each model and bind with output
for (i in 2:87) {
a <- summary(gadmods[[i]])$coefficients
gadoutput <- rbind(gadoutput, a)
}
# loop through models - PHQ
phqmods <- list()
for (i in 1:length(vars_cwc)) {
f <- formula(paste("phq_mean~(1|pid)+ weekagg + Age + Gender + tot_gad + tot_spin + ", vars_cwc[i], "+", vars_cmc[i]))
phqmods[[i]] <- lmer(f, data=wave2wave3outscaled)
}
# create dataframe to store output
phqoutput <- summary(phqmods[[1]])$coefficients
# loop through to extract coefficients for each model and bind with output
for (i in 2:87) {
a <- summary(phqmods[[i]])$coefficients
phqoutput <- rbind(phqoutput, a)
}
#write.csv(sadoutput, "SPINResultsStdRev.csv")
#write.csv(gadoutput, "GADResultsStdRev.csv")
#write.csv(phqoutput, "PHQResultsStdRev.csv")