Setup

1. Libraries

library(psych)
library(psycho)
library(lme4)
library(lmerTest)
library(nlme)
library(sjstats)
library(dplyr)
library(foreign)
library(sjPlot)
library(dplyr)
library(tidyr)
library(plyr)
library(ggplot2)
library(gridExtra)
library(EMAeval)
library(knitr)
library(arsenal)
library(corrplot)
library(magrittr)
library(kableExtra)
library(ggcorrplot)

2. Dataframes

#Measures of Sx and Cog risk etc.
trait <- read.csv("/Users/nikki/Desktop/Research/HealthyU_Scanning_Local/Emily_HUresources/WRG Survey/HealthyU_Questionnaires_Masterdata_Wide.csv", header=T)
trait <- trait[-c(1)]
#trait <- trait[c(2,9:11,13:32)]

#EMA and GPS variables

ema <- read.csv("/Users/nikki/Desktop/Research/Masters_reanalysis/Data/HealthyU_EMA_andGPS_onlyEMAdays.csv")
ema_wi <- read.csv("/Users/nikki/Desktop/Research/EMA/HealthyU_EMA_andGPS_onlyEMAdays_AllData.csv")
  #ema_wi <- ema_wi[-c(1)]
ema_bw <- read.csv("/Users/nikki/Desktop/Research/EMA/HealthyU_EMA_andGPS_onlyEMAdays_BWsubjectsData.csv")
  #ema_bw <- ema_bw[-c(1)]
allema <- merge(ema_wi, ema_bw, by.x = "subid", by.y = "Subject")

3. EMA data preparation (Skip this step now)

colnames(ema)
ema$allstress <- (ema$LivingStress + ema$FamilyStress + ema$AcademicStress + ema$PeerStress + ema$JobStress + ema$OtherStress)

ema_items <- ema

## careless responders

flagged <- SD_cutoff(ema_items, item.colnames = c("anxious","attentive","content","excited","happy","irritable",            "nervous","upset","well_sleep","relaxed"), ID.colname = "subid")
flagged <- flagged$Index_of_Flagged_Assessment

ema_items$careless <- 0
ema_items$careless[flagged] <- 1

ema_inclCareless <- ema_items
ema_items <- subset(ema_items,ema_items$careless == 0)

#ema_items <- ema[c(2,41,42,47,35:40,9:20,33,34)]

    sublist <- split(ema , f = ema$subid )
    #sublist <-lapply(sublist, function(sublist) cbind(sublist, "StartDay"= sublist[1,3]))
    nobs <- lapply(sublist, function(x) nrow(x))
    nobs <- do.call("rbind", nobs)
    nobs <- as.data.frame(nobs)
    nobs$subid <- rownames(nobs)
    colnames(nobs) <- c("numResp", "subid")
   
 
##Affect 
    grpmeans<-aggregate(ema_items$NA_Mean_new,list(ema_items$subid),mean,na.rm=TRUE)
    print(grpmeans) #examine group means
    names(grpmeans)<-c("Subject","ema_avg_na") #rename variables
    
    grpmeans_pa<-aggregate(ema_items$PA_Mean_new,list(ema_items$subid),mean,na.rm=TRUE)
    print(grpmeans_pa) #examine group means
    names(grpmeans_pa)<-c("Subject","ema_avg_pa") #rename variables
    ema_means <-merge(grpmeans,grpmeans_pa,by="Subject") #merge group means into original data
    
    ## Stress reactivity
  
    func <- function(ema_items)
    {
    return(data.frame(ema_stressReact = cor(ema_items$NA_Mean_new, ema_items$allstress, use = "pairwise.complete.obs")))
    }
    
    nastress <- ddply(ema_items, .(subid), func)
     ema_means <- merge(ema_means,nastress,by.x = "Subject", by.y = "subid", all.x = T)


##Stress     
    grpmeans_str<-aggregate(ema_items$allstress,list(ema_items$subid),mean,na.rm=TRUE)
    names(grpmeans_str)<-c("Subject","ema_all_str") #rename variables
    ema_means <-merge(ema_means,grpmeans_str,by="Subject") #merge group means into original data
    
    grpmeans_str1<-aggregate(ema_items$LivingStress,list(ema_items$subid),mean,na.rm=TRUE)
    names(grpmeans_str1)<-c("Subject","ema_living_str") #rename variables
    ema_means <-merge(ema_means,grpmeans_str1,by="Subject") #merge group means into original data
    
    grpmeans_str2<-aggregate(ema_items$FamilyStress,list(ema_items$subid),mean,na.rm=TRUE)
    names(grpmeans_str2)<-c("Subject","ema_family_str") #rename variables
    ema_means <-merge(ema_means,grpmeans_str2,by="Subject") #merge group means into original data
    
    grpmeans_str3<-aggregate(ema_items$AcademicStress,list(ema_items$subid),mean,na.rm=TRUE)
    names(grpmeans_str3)<-c("Subject","ema_acad_str") #rename variables
    ema_means <-merge(ema_means,grpmeans_str3,by="Subject") #merge group means into original data
    
    grpmeans_str4<-aggregate(ema_items$PeerStress,list(ema_items$subid),mean,na.rm=TRUE)
    names(grpmeans_str4)<-c("Subject","ema_peer_str") #rename variables
    ema_means <-merge(ema_means,grpmeans_str4,by="Subject") #merge group means into original data
    
    grpmeans_str5<-aggregate(ema_items$JobStress,list(ema_items$subid),mean,na.rm=TRUE)
    names(grpmeans_str5)<-c("Subject","ema_job_str") #rename variables
    ema_means <-merge(ema_means,grpmeans_str5,by="Subject") #merge group means into original data
    
    grpmeans_str6<-aggregate(ema_items$OtherStress,list(ema_items$subid),mean,na.rm=TRUE)
    names(grpmeans_str6)<-c("Subject","ema_other_str") #rename variables
    ema_means <-merge(ema_means,grpmeans_str6,by="Subject") #merge group means into original data

##Social
    
    grpmeans<-aggregate(ema_items$SocConnect,list(ema_items$subid),mean,na.rm=TRUE)
    names(grpmeans)<-c("Subject","ema_social_connect") #rename variables
    ema_means <-merge(ema_means,grpmeans,by="Subject") #merge group means into original data
    
    grpmeans<-aggregate(ema_items$DesireSocEngagement,list(ema_items$subid),mean,na.rm=TRUE)
    names(grpmeans)<-c("Subject","ema_social_desire") #rename variables
    ema_means <-merge(ema_means,grpmeans,by="Subject") #merge group means into original data

## With Who

    grpsum<-aggregate(ema_items$Acquaintances,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_withAcquant") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
 
    grpsum<-aggregate(ema_items$Strangers,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_withStranger") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
 
    grpsum<-aggregate(ema_items$CloseFriends,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_withCFriend") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
 
    grpsum<-aggregate(ema_items$RomanticPart,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_withPartner") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
 
    grpsum<-aggregate(ema_items$Family,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_withFam") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
    
    grpsum<-aggregate(ema_items$Coworkers,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_withCowork") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
    
    grpsum<-aggregate(ema_items$Alone,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_withSelf") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
    
    ema_means$with_Sum <- (ema_means$ema_withAcquant + ema_means$ema_withStranger + ema_means$ema_withCFriend+ ema_means$ema_withPartner+ ema_means$ema_withFam+ ema_means$ema_withCowork+ ema_means$ema_withSelf)
    
ema_means <- merge(ema_means, nobs, by.x = "Subject", by.y = "subid")    
    
## Place/Location
 ema_items$Location <- factor(ema_items$Location,
                        levels=c(1,2,3,4,5,6,7),
                        labels= c("rest_store","home","work","school","others_home","gym_exerc","other"))
 
 ema_items$Location <- as.character(ema_items$Location)
 ema_items$LocationSPECIFY <- as.character(ema_items$LocationSPECIFY)
  ema_items$Location2 <- ifelse(ema_items$Location == "other", ema_items$LocationSPECIFY, ema_items$Location)
  
    #spread locations wide
    ema_items_loc <- ema_items[c("subid","Date","Location")]
    ema_items_loc$ema_rest_store <- ifelse(ema_items_loc$Location=="rest_store",1,0)
    ema_items_loc$ema_home <- ifelse(ema_items_loc$Location=="home",1,0)
    ema_items_loc$ema_work <- ifelse(ema_items_loc$Location=="work",1,0)
    ema_items_loc$ema_school <- ifelse(ema_items_loc$Location=="school",1,0)
    ema_items_loc$ema_other_home <- ifelse(ema_items_loc$Location=="others_home",1,0)
    ema_items_loc$ema_gym_exerc <- ifelse(ema_items_loc$Location=="gym_exerc",1,0)
    ema_items_loc$ema_other <- ifelse(ema_items_loc$Location=="other",1,0)
    #move into big dataframe
    ema_items <- cbind(ema_items, ema_items_loc[c(4:10)])
 
     ##Summing it 
    grpsum<-aggregate(ema_items$ema_rest_store,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_loc_restStore") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
 
    grpsum<-aggregate(ema_items$ema_home,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_loc_home") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
 
    grpsum<-aggregate(ema_items$ema_work,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_loc_work") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
 
    grpsum<-aggregate(ema_items$ema_school,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_loc_school") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
 
    grpsum<-aggregate(ema_items$ema_other_home,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_loc_otherHome") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
    
    grpsum<-aggregate(ema_items$ema_gym_exerc,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_loc_exerc") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
    
    grpsum<-aggregate(ema_items$ema_other,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_loc_other") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data
    
     ema_means$loc_Sum <- (ema_means$ema_loc_restStore + ema_means$ema_loc_home + ema_means$ema_loc_work+ ema_means$ema_loc_school+ ema_means$ema_loc_otherHome+ ema_means$ema_loc_exerc+ ema_means$ema_loc_other)
  
## Convos
     
    grpsum<-aggregate(ema_items$F2FConvo,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_liveConvo") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data

    grpsum<-aggregate(ema_items$DigitalConvo,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_digitalConvo") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data

    grpsum<-aggregate(ema_items$NoConvo,list(ema_items$subid),sum,na.rm=TRUE)
    names(grpsum)<-c("Subject","ema_noConvo") #rename variables
    ema_means <-merge(ema_means,grpsum,by="Subject") #merge group means into original data

4. Write out new dataframes (Skip this step now)

#write.csv(ema_items,"/Users/nikki/Desktop/Research/EMA/HealthyU_EMA_andGPS_onlyEMAdays_AllData.csv")
#write.csv(ema_means,"/Users/nikki/Desktop/Research/EMA/HealthyU_EMA_andGPS_onlyEMAdays_BWsubjectsData.csv")

Basic descriptions and visuals

Density plots

Black lines in the background represent each subject’s distribution and the single colored distribution is across the subjects’ means
avg_na <- ggplot(data = allema) +
  geom_density(aes(x=NA_Mean_new, group=subid),  alpha=.1) +
  geom_density(aes(x=ema_avg_na, group=1), fill = "turquoise", alpha = .5) + 
  labs(title="Individual and Group NA density", x = "Negative Affect", y = "Density")+
  ylim(0,0.11) +
  theme_minimal() 

avg_pa <- ggplot(data = allema) +
  geom_density(aes(x=PA_Mean_tri, group=subid),  alpha=.1) +
  geom_density(aes(x=ema_avg_pa, group=1), fill = "palegreen3", alpha = .5) + 
  labs(title="Individual and Group PA density", x = "Positive Affect", y = "Density")+
  ylim(0,.11) +
      theme_minimal() 

avg_stress <- ggplot(data = allema) +
  geom_density(aes(x=allstress, group=subid),  alpha=.1) +
  geom_density(aes(x=ema_all_str, group=1), fill = "palevioletred2", alpha = .5) + 
  labs(title="Individual and Group Stress density", x = "Total Stress Rating", y = "Density")+
  ylim(0,.65) +
      theme_minimal() 

avg_social <- ggplot(data = allema) +
  geom_density(aes(x=SocConnect, group=subid),  alpha=.1) +
  geom_density(aes(x=ema_social_connect, group=1), fill = "sienna1", alpha = .5) + 
  labs(title="Individual and Group Social density", x = "Social Connectedness", y = "Density")+
  ylim(0,.21) +
      theme_minimal() 

avg_entropy <- ggplot(data = allema) +
  geom_density(aes(x= entropy_sameday, group=subid),  alpha=.1) +
  geom_density(aes(x=entropy_sameday, group=1), fill = "mediumpurple3", alpha = .5) + 
  labs(title="Individual and Group Entropy density", x = "RE", y = "Density")+
  ylim(0,1.85) +
      theme_minimal() 

#grid.arrange(avg_na, avg_pa, avg_stress,avg_social,avg_entropy,nrow = 5)
avg_na

avg_pa 

avg_stress

avg_social

avg_entropy

Categorical EMA item plots

Various types of stress, who subjects are with/their company, and different locations are examined:
cat_ema1 <- ema_bw[c(1,6:11)]
cat_ema2 <- ema_bw[c(1,14:20,22)]
cat_ema3 <- ema_bw[c(1,22,23:29)]
    #1
    #colnames(cat_ema1)
    keycol <- "stresstype"
    valuecol <- "sum"
    gathercols <- c("ema_living_str","ema_family_str","ema_acad_str","ema_peer_str",
                    "ema_job_str","ema_other_str")
    cat_ema1_long <- gather_(cat_ema1, keycol, valuecol, gathercols)
    
    cat_ema1_long$stresstype <- as.factor(cat_ema1_long$stresstype)
    
    ggplot(data = cat_ema1_long, aes(group = stresstype)) + 
      geom_boxplot(aes(x = stresstype, y = sum, fill = stresstype)) +
      geom_violin(aes(x = stresstype, y = sum, fill = stresstype), alpha = .2) +
      labs(title="Types of Stress Ratings", x = "Stress Domains", y = "Average rating per Subject")+
      theme_minimal()

    #2
    #colnames(cat_ema2)
    keycol <- "withtype"
    valuecol <- "sum"
    gathercols <- c("ema_withAcquant","ema_withStranger","ema_withCFriend","ema_withPartner",
                    "ema_withFam","ema_withCowork","ema_withSelf")
    cat_ema2_long <- gather_(cat_ema2, keycol, valuecol, gathercols)
    
    cat_ema2_long$withtype <- as.factor(cat_ema2_long$withtype)
    
    ggplot(data = cat_ema2_long, aes(group = withtype)) + 
      geom_boxplot(aes(x = withtype, y = (sum/numResp), fill = withtype), outlier.shape = NA) +
      geom_violin(aes(x = withtype, y = (sum/numResp), fill = withtype), alpha = .2) +
      geom_point(aes(x = withtype, y = (sum/numResp), fill = withtype), size = 2, shape = 6, alpha = .45) +
      labs(title="Proportion of Occasions per Company type", x = "Company/Who the're with", y = "Proportion of Occasions per Subject")+
      theme_minimal()

    #3
   # colnames(cat_ema3)
    keycol <- "Locationtype"
    valuecol <- "sum"
    gathercols <- c("ema_loc_restStore","ema_loc_home","ema_loc_work","ema_loc_school",
                    "ema_loc_otherHome","ema_loc_exerc","ema_loc_other")
    cat_ema3_long <- gather_(cat_ema3, keycol, valuecol, gathercols)
    
    cat_ema3_long$Locationtype <- as.factor(cat_ema3_long$Locationtype)
    
    ggplot(data = cat_ema3_long, aes(group = Locationtype)) + 
      geom_boxplot(aes(x = Locationtype, y = (sum/numResp), fill = Locationtype),outlier.shape = NA) +
      geom_violin(aes(x = Locationtype, y = (sum/numResp), fill = Locationtype), alpha = .2) +
      geom_point(aes(x = Locationtype, y = (sum/numResp), fill = Locationtype), size = 2, shape = 6, alpha = .45) +
       labs(title="Proportion of Occasions per Location type", x = "Location", y = "Proportion of Occasions per Subject")+
      theme_minimal()

Descriptive statistics

ema_bw_reduced <- ema_bw[c(22,2:5,6:20,33)]
means <- describe(ema_bw_reduced)
means <- round(means, digits=2)
#Knit a lil table
means %>%
  kbl() %>%
  kable_styling()
vars n mean sd median trimmed mad min max range skew kurtosis se
numResp 1 141 24.91 8.76 25.00 25.22 5.93 1.00 47.00 46.00 -0.31 0.72 0.74
ema_avg_na 2 141 34.99 14.41 36.04 35.00 14.03 3.25 74.81 71.56 0.02 -0.25 1.21
ema_avg_pa 3 141 54.02 10.05 55.02 54.20 8.63 24.31 87.49 63.18 -0.15 0.41 0.85
ema_stressReact 4 132 0.35 0.29 0.38 0.37 0.29 -0.68 0.99 1.66 -0.65 0.86 0.03
ema_all_str 5 137 14.66 5.41 13.52 14.21 5.72 6.33 32.22 25.89 0.73 0.01 0.46
ema_living_str 6 140 1.93 1.12 1.50 1.72 0.74 1.00 6.67 5.67 1.55 2.20 0.09
ema_family_str 7 141 2.05 1.08 1.64 1.90 0.90 1.00 5.89 4.89 1.07 0.49 0.09
ema_acad_str 8 141 4.15 1.27 4.14 4.15 1.28 1.00 7.00 6.00 0.02 -0.50 0.11
ema_peer_str 9 141 2.13 1.12 1.86 1.97 1.04 1.00 5.67 4.67 1.14 0.67 0.09
ema_job_str 10 140 2.24 1.32 1.71 2.05 0.91 1.00 7.00 6.00 1.15 0.60 0.11
ema_other_str 11 140 2.29 1.21 1.94 2.13 1.18 1.00 5.61 4.61 0.97 0.08 0.10
ema_social_connect 12 141 50.66 14.76 50.58 51.20 14.31 6.85 92.21 85.36 -0.36 0.49 1.24
ema_social_desire 13 141 47.49 15.54 47.55 47.88 13.44 4.78 98.22 93.44 -0.08 0.84 1.31
ema_withAcquant 14 141 3.35 3.24 3.00 2.94 2.97 0.00 18.00 18.00 1.29 2.26 0.27
ema_withStranger 15 141 1.71 2.53 1.00 1.23 1.48 0.00 17.00 17.00 2.95 12.82 0.21
ema_withCFriend 16 141 6.70 4.98 6.00 6.31 5.93 0.00 23.00 23.00 0.66 -0.04 0.42
ema_withPartner 17 141 0.67 1.98 0.00 0.22 0.00 0.00 19.00 19.00 6.18 50.08 0.17
ema_withFam 18 141 2.43 2.71 2.00 1.96 2.97 0.00 16.00 16.00 1.82 4.52 0.23
ema_withCowork 19 141 0.50 1.07 0.00 0.26 0.00 0.00 8.00 8.00 3.46 17.12 0.09
ema_withSelf 20 141 10.50 6.14 10.00 10.07 5.93 0.00 32.00 32.00 0.82 1.05 0.52
ema_noConvo 21 141 11.42 7.76 11.00 10.81 7.41 0.00 36.00 36.00 0.71 0.38 0.65

Reliability

print(paste("Stress Domains Alpha = ", round(a1$total$raw_alpha, digits=4), sep=""))
## [1] "Stress Domains Alpha = 0.8634"
print(paste("Company Domains Alpha = ", round(a2$total$raw_alpha, digits=4), sep=""))
## [1] "Company Domains Alpha = 0.1922"
print(paste("Location Domains Alpha = ", round(a3$total$raw_alpha, digits=4), sep=""))
## [1] "Location Domains Alpha = 0.0103"

Correlations and heat maps

 ggcorrplot(r1, method = "square", ggtheme = ggplot2::theme_minimal, title = "Correlations between subject averages of EMA variables", show.legend = TRUE, legend.title = "Corr", show.diag = FALSE,
  colors = c("blue1","gray90", "violetred3"), outline.color = "black",
  hc.order = FALSE, hc.method = "pairwise", lab = T,
  lab_col = "black", lab_size = 2, p.mat = p.mat1, sig.level = 0.05,
  insig = c("pch"), pch = 4, pch.col = "black",
  pch.cex = 5, tl.cex = 12, tl.col = "black", tl.srt = 45,
  digits = 2)

 ggcorrplot(r2, method = "square", ggtheme = ggplot2::theme_minimal, title = "Correlations between subject averages of Stress domains", show.legend = TRUE, legend.title = "Corr", show.diag = FALSE,
  colors = c("blue1","gray90", "violetred3"), outline.color = "gray",
  hc.order = FALSE, hc.method = "complete", lab = T,
  lab_col = "black", lab_size = 2, p.mat = NULL, sig.level = 0.05,
  insig = c("pch"), pch = 4, pch.col = "black",
  pch.cex = 5, tl.cex = 12, tl.col = "black", tl.srt = 45,
  digits = 2)

 ggcorrplot(r3, method = "square", ggtheme = ggplot2::theme_minimal, title = "Correlations between subject averages of EMA variables", show.legend = TRUE, legend.title = "Corr", show.diag = FALSE,
 colors = c("blue1","gray90", "violetred3"), outline.color = "black",
  hc.order = FALSE, hc.method = "complete", lab = T,
  lab_col = "black", lab_size = 2, p.mat = NULL, sig.level = 0.05,
  insig = c("pch"), pch = 4, pch.col = "black",
  pch.cex = 5, tl.cex = 12, tl.col = "black", tl.srt = 45,
  digits = 2)