expt2_task2_full

Data Wrangling

data_path <- "/Users/apple/Desktop/JHU/Energy_Data_Analysis/main_expt2_task2_data_raw"
mean_dif <- read.csv("/Users/apple/Desktop/JHU/Energy_Data_Analysis/mean_dif.csv")
file_paths <- list.files(path = data_path, full.names = TRUE)
d_task2 <- tidytable::map_df(file_paths, ~ read.csv(.x) %>% 
              mutate(filename = basename(.x))) 

d_task2$response <- as.numeric(d_task2$response)
Warning: NAs introduced by coercion
d_task2_filtered <- d_task2 |>
  dplyr::filter(task %in% c("energy-compcheck","diff-compcheck","energy-rating")) %>% #subset to relevant trials 
  dplyr::group_by(filename) %>%
  mutate(correct = case_when(
    activity == "att_middle" & response > 48 & response < 52 ~ "true",
    activity == "att_right" & as.numeric(response) == 100 ~ "true",
    activity %in% c("att_middle", "att_right") ~ "false",
    TRUE ~ correct  
  )) %>%
    dplyr::filter(sum(correct == "false",na.rm = TRUE) < 2) #remove 2+ fails in comp+att checks

length(unique(d_task2_filtered$filename)) # n=84
[1] 84
#cleaning up to test trials + energy level
d_task2_clean <- d_task2_filtered %>%
  filter(!task %in% c("energy-compcheck","diff-compcheck") & !activity %in% c("att_right","att_middle")) %>% 
  mutate(expressed_diff = case_when(
    str_detect(stimulus, "so hard") ~ "hard",
    str_detect(stimulus, "so easy") ~ "easy",
    TRUE ~ NA
  ))

#create pseudo id
d_task2_clean <- d_task2_clean %>%
  group_by(filename) %>%
  mutate(subjectID = sprintf("sub%03d", cur_group_id())) %>%
  ungroup() #add pseudo id

d_task2_clean$subjectID <- as.factor(d_task2_clean$subjectID)
#adding act_type and act_diff (continuous & categorical)
d_task2_labeled <- d_task2_clean %>%
  mutate(act_type = case_when(
    str_detect(activity, "run") | str_detect(activity, "clean") | str_detect(activity, "hike") | str_detect(activity, "jump") | str_detect(activity, "climb") | str_detect(activity, "swim") ~ "physical",
    str_detect(activity, "handwriting") | str_detect(activity, "math") | str_detect(activity, "violin") | str_detect(activity, "homework") | str_detect(activity, "flashcards") | str_detect(activity, "alphabet") ~ "mental",
    TRUE ~ "restorative"))

d_task2_labeled <- d_task2_labeled %>%
  mutate(activity = case_when(
    activity %in% c("do their homework") ~ "do homework",
    activity %in% c("listen to their favorite stories") ~ "listen to favorite stories",
    activity %in% c("watch their favorite show on the couch") ~ "watch favorite show on the couch",
    TRUE ~ activity))

d_task2_merged <- d_task2_labeled |>
  dplyr::left_join(mean_dif, by = "activity")

d_task2_merged$act_mean_diff <- d_task2_merged$act_diff

d_task2_merged <- d_task2_merged %>%
  mutate(act_diff_level = case_when(
    str_detect(stimulus, "run") | str_detect(stimulus, "climb") | str_detect(stimulus, "math") | str_detect(stimulus, "homework") ~ "hard",
    str_detect(stimulus, "hike") | str_detect(stimulus, "house") | str_detect(stimulus, "violin") | str_detect(stimulus, "handwriting") ~ "medium",
    str_detect(stimulus, "jump") | str_detect(stimulus, "swim") | str_detect(stimulus, "alphabet") | str_detect(stimulus, "sounding") ~ "easy",
    TRUE ~ NA 
  ))
#Removing outliers
library(olsrr) 
library(influence.ME)

d_task2_phys_ment <- d_task2_merged |>
  filter(act_type=="physical"|act_type=="mental")
  
infl <- influence(lmer(formula = response ~ expressed_diff + (1 |subjectID), data=d_task2_phys_ment), group = "subjectID")
cooksd <- cooks.distance(infl, sort=TRUE)

plot(infl, which = "cook",
     cutoff=4/84, sort=TRUE,) # 22 outliers at threshold 4/N 

d_task2_after_exclusion <- d_task2_merged %>%
  filter(!subjectID %in% rownames(cooksd)[cooksd > 4/84])

length(unique(d_task2_after_exclusion$filename)) # n=62 in final sample
[1] 62

Outlier inspection

d_task2_outliers <- d_task2_merged %>%
  filter(subjectID %in% rownames(cooksd)[cooksd > 4/81])

d_task2_outliers$act_type <- factor(d_task2_outliers$act_type, levels = c("physical", "mental", "restorative"))
d_task2_outliers$act_diff_level <- factor(d_task2_outliers$act_diff_level, levels = c("easy", "medium", "hard"))
d_task2_outliers$expressed_diff <- factor(d_task2_outliers$expressed_diff)

ggplot(d_task2_outliers, 
       aes(x = act_type, y=response,fill=act_diff_level))+ 
  stat_boxplot(geom='errorbar')+
  geom_boxplot() +
  facet_wrap(~expressed_diff) +
 # stat_summary(fun.y=mean, geom="point", size=1)+
  theme_minimal()+
  scale_fill_brewer(palette="Oranges")+
  labs(title="Outlier Data: Inferred Energy Given Expressed Difficulty",x="Expressed Difficulty",y="Inferred Energy",fill="difficulty level")+
  theme(plot.title = element_text(size=15))

A lot of variability/noise

Visualization

d_task2_after_exclusion$act_type <- factor(d_task2_after_exclusion$act_type, levels = c("physical", "mental", "restorative"))
d_task2_after_exclusion$act_diff_level <- factor(d_task2_after_exclusion$act_diff_level, levels = c("easy", "medium", "hard"))
d_task2_after_exclusion$expressed_diff <- factor(d_task2_after_exclusion$expressed_diff)

#ggplot(subset(d_task2_after_exclusion, act_type!= "restorative"), 
ggplot(d_task2_after_exclusion,
       aes(x = act_type, y=response,fill=act_diff_level))+ 
  stat_boxplot(geom='errorbar')+
  geom_boxplot() +
  facet_wrap(~expressed_diff) +
 # stat_summary(fun.y=mean, geom="point", size=1)+
  theme_minimal()+
  scale_fill_brewer(palette="Oranges")+
  labs(title="Inferred Energy Given Expressed Difficulty",x="Expressed Difficulty",y="Inferred Energy",fill="difficulty level")+
  theme(plot.title = element_text(size=15))

we see an effect of expressed difficulty - adults infer people are overall energetic when they expressed ease doing something and tired when they expressed difficulty doing something.

also differ by activity type -

  • when subjective cost of activities is low, adults are likely to infer less energy for mental activities than physical activities.

  • when subjective cost of activities is high, adults are likely to infer higher energy for mental activities than physical activities. It seems that subjective cost more directly maps onto adults’ energy inferences for physical activities than mental activities.

It is unclear from the visualization whether difficulty level of activities has additional effect on adults’ energy inference.

Modeling

library(lmerTest)

levels(d_task2_after_exclusion$expressed_diff)
[1] "easy" "hard"
levels(d_task2_after_exclusion$act_diff_level)
[1] "easy"   "medium" "hard"  
levels(d_task2_after_exclusion$act_type)
[1] "physical"    "mental"      "restorative"
d_task2_after_exclusion_physment <- d_task2_after_exclusion |>
  filter(act_type!="restorative")

# Primary H
model1 <- lmer(data=d_task2_after_exclusion_physment, formula=response ~ expressed_diff + (1|subjectID))
summary(model1)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: response ~ expressed_diff + (1 | subjectID)
   Data: d_task2_after_exclusion_physment

REML criterion at convergence: 6483.4

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.7494 -0.6490  0.0767  0.5988  4.1869 

Random effects:
 Groups    Name        Variance Std.Dev.
 subjectID (Intercept)  29.78    5.457  
 Residual              338.56   18.400  
Number of obs: 744, groups:  subjectID, 62

Fixed effects:
                   Estimate Std. Error      df t value Pr(>|t|)    
(Intercept)          86.509      1.173 125.230   73.73   <2e-16 ***
expressed_diffhard  -61.613      1.377 723.466  -44.75   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
exprssd_dff -0.566

people infer different energy level depending on the agents’ expressed difficulty (harder->less energy) for physical and mental energy.

# Exploratory Q1 - does energy judgment change depending on activity type and/or activity difficulty, on top of expressed diff, for all activities? 
model2 <- lmer(data=d_task2_after_exclusion, formula=response ~ expressed_diff + act_type + act_mean_diff+ (1|subjectID))
summary(model2) # NO
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: response ~ expressed_diff + act_type + act_mean_diff + (1 | subjectID)
   Data: d_task2_after_exclusion

REML criterion at convergence: 10259.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.5885 -0.6578 -0.0552  0.6556  3.1320 

Random effects:
 Groups    Name        Variance Std.Dev.
 subjectID (Intercept)  15.41    3.925  
 Residual              568.34   23.840  
Number of obs: 1116, groups:  subjectID, 62

Fixed effects:
                      Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)           88.99658    4.03301 1109.75046  22.067   <2e-16 ***
expressed_diffhard   -52.11040    1.44306 1102.61821 -36.111   <2e-16 ***
act_typemental        -2.87692    1.79054 1050.07083  -1.607   0.1084    
act_typerestorative   -6.00492    3.28401 1050.31555  -1.829   0.0678 .  
act_mean_diff         -0.09062    0.05753 1050.20724  -1.575   0.1155    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) exprs_ act_typm act_typr
exprssd_dff -0.135                         
act_typmntl -0.415  0.006                  
act_typrstr -0.896 -0.058  0.442           
act_men_dff -0.927 -0.043  0.216    0.846  

Energy judgment does not change depending on activity type and/or activity mean difficulty, on top of expressed diff.

# Exploratory Q2 - does effect of expressed diff on energy judgment change depending on activity type, taking into account mean difficulty?
model3 <- lmer(data=d_task2_after_exclusion, formula=response ~ expressed_diff*act_type + act_mean_diff + (1|subjectID))

plot(allEffects(model3))

summary(model3) 
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: response ~ expressed_diff * act_type + act_mean_diff + (1 | subjectID)
   Data: d_task2_after_exclusion

REML criterion at convergence: 10127.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2530 -0.6213 -0.0013  0.4960  3.5408 

Random effects:
 Groups    Name        Variance Std.Dev.
 subjectID (Intercept)  10.49    3.239  
 Residual              511.68   22.620  
Number of obs: 1116, groups:  subjectID, 62

Fixed effects:
                                         Estimate Std. Error         df t value
(Intercept)                              96.55475    3.88489 1106.17909  24.854
expressed_diffhard                      -71.23335    2.36892 1100.67386 -30.070
act_typemental                          -12.09856    2.32772 1083.22723  -5.198
act_typerestorative                     -24.45691    3.51446 1067.81275  -6.959
act_mean_diff                            -0.06241    0.05474 1048.43401  -1.140
expressed_diffhard:act_typemental        19.13293    3.35515 1105.70114   5.703
expressed_diffhard:act_typerestorative   38.38212    3.35468 1107.53513  11.441
                                       Pr(>|t|)    
(Intercept)                             < 2e-16 ***
expressed_diffhard                      < 2e-16 ***
act_typemental                         2.41e-07 ***
act_typerestorative                    5.97e-12 ***
act_mean_diff                             0.255    
expressed_diffhard:act_typemental      1.51e-08 ***
expressed_diffhard:act_typerestorative  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
                       (Intr) exprs_ act_typm act_typr act_m_
exprssd_dff            -0.224                                
act_typmntl            -0.396  0.489                         
act_typrstr            -0.857  0.270  0.411                  
act_men_dff            -0.899 -0.082  0.106    0.731         
exprssd_dffhrd:ct_typm  0.143 -0.708 -0.684   -0.179    0.075
exprssd_dffhrd:ct_typr  0.170 -0.707 -0.348   -0.459    0.045
                       exprssd_dffhrd:ct_typm
exprssd_dff                                  
act_typmntl                                  
act_typrstr                                  
act_men_dff                                  
exprssd_dffhrd:ct_typm                       
exprssd_dffhrd:ct_typr  0.501                

model 3 results:

easy hard
physical 96.6 25.3
mental 84.5 32.4
restorative 72.1 39.2

Effect of expressed diff on energy judgment changes depending on activity type, such that people infer 19.1 points higher energy when someone find mental activities hard, than when they find physical activities hard. People also infer 38.4 points higher energy when someone expresssed restorative activities are hard, than when they say physical activities are hard.

# Exploratory Q3 - does effect of expressed diff on energy judgment change depending on activity type and difficulty level (phys and ment only)?
model4 <- lmer(data=d_task2_after_exclusion_physment, formula=response ~ expressed_diff*act_diff_level + (1|subjectID))
model5 <- lmer(data=d_task2_after_exclusion_physment, formula=response ~ expressed_diff*act_type + (1|subjectID))
model6 <- lmer(data=d_task2_after_exclusion_physment, formula=response ~ expressed_diff*act_type*act_diff_level + (1|subjectID))

anova(model4,model5,model6) 
refitting model(s) with ML (instead of REML)
Data: d_task2_after_exclusion_physment
Models:
model5: response ~ expressed_diff * act_type + (1 | subjectID)
model4: response ~ expressed_diff * act_diff_level + (1 | subjectID)
model6: response ~ expressed_diff * act_type * act_diff_level + (1 | subjectID)
       npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
model5    6 6444.5 6472.2 -3216.2   6432.5                         
model4    8 6500.1 6537.0 -3242.0   6484.1  0.000  2          1    
model6   14 6452.0 6516.6 -3212.0   6424.0 60.054  6  4.388e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(allEffects(model6)) #model 6's better but no significant interaction term

summary(model6)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: response ~ expressed_diff * act_type * act_diff_level + (1 |  
    subjectID)
   Data: d_task2_after_exclusion_physment

REML criterion at convergence: 6381.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.4626 -0.5914 -0.0311  0.4861  4.5362 

Random effects:
 Groups    Name        Variance Std.Dev.
 subjectID (Intercept)  27.41    5.236  
 Residual              315.22   17.754  
Number of obs: 744, groups:  subjectID, 62

Fixed effects:
                                                       Estimate Std. Error
(Intercept)                                             93.5896     2.1871
expressed_diffhard                                     -72.8821     3.3263
act_typemental                                         -12.4405     3.1148
act_diff_levelmedium                                    -1.5650     3.1420
act_diff_levelhard                                      -1.9630     3.1600
expressed_diffhard:act_typemental                       21.2651     4.6515
expressed_diffhard:act_diff_levelmedium                  1.2753     4.6510
expressed_diffhard:act_diff_levelhard                    3.5285     4.6575
act_typemental:act_diff_levelmedium                      2.5669     4.4452
act_typemental:act_diff_levelhard                       -0.5847     4.4956
expressed_diffhard:act_typemental:act_diff_levelmedium   0.9319     6.5637
expressed_diffhard:act_typemental:act_diff_levelhard    -5.9890     6.5249
                                                             df t value
(Intercept)                                            623.0098  42.792
expressed_diffhard                                     718.6052 -21.911
act_typemental                                         694.9238  -3.994
act_diff_levelmedium                                   693.2524  -0.498
act_diff_levelhard                                     695.1524  -0.621
expressed_diffhard:act_typemental                      715.5607   4.572
expressed_diffhard:act_diff_levelmedium                714.2498   0.274
expressed_diffhard:act_diff_levelhard                  715.5534   0.758
act_typemental:act_diff_levelmedium                    694.2412   0.577
act_typemental:act_diff_levelhard                      690.5150  -0.130
expressed_diffhard:act_typemental:act_diff_levelmedium 714.9136   0.142
expressed_diffhard:act_typemental:act_diff_levelhard   708.3715  -0.918
                                                       Pr(>|t|)    
(Intercept)                                             < 2e-16 ***
expressed_diffhard                                      < 2e-16 ***
act_typemental                                         7.19e-05 ***
act_diff_levelmedium                                      0.619    
act_diff_levelhard                                        0.535    
expressed_diffhard:act_typemental                      5.70e-06 ***
expressed_diffhard:act_diff_levelmedium                   0.784    
expressed_diffhard:act_diff_levelhard                     0.449    
act_typemental:act_diff_levelmedium                       0.564    
act_typemental:act_diff_levelhard                         0.897    
expressed_diffhard:act_typemental:act_diff_levelmedium    0.887    
expressed_diffhard:act_typemental:act_diff_levelhard      0.359    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
                                      (Intr) exprs_ act_ty act_dff_lvlm
exprssd_dff                           -0.613                           
act_typmntl                           -0.637  0.429                    
act_dff_lvlm                          -0.633  0.429  0.443             
act_dff_lvlh                          -0.628  0.424  0.438  0.439      
exprssd_d:_                            0.438 -0.714 -0.685 -0.304      
exprssd_dffhrd:ct_dff_lvlm             0.440 -0.717 -0.306 -0.690      
exprssd_dffhrd:ct_dff_lvlh             0.438 -0.714 -0.304 -0.308      
act_typmntl:ct_dff_lvlm                0.446 -0.300 -0.700 -0.705      
act_typmntl:ct_dff_lvlh                0.441 -0.298 -0.691 -0.307      
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm -0.309  0.505  0.485  0.486      
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh -0.313  0.510  0.485  0.218      
                                      act_dff_lvlh exp_:_
exprssd_dff                                              
act_typmntl                                              
act_dff_lvlm                                             
act_dff_lvlh                                             
exprssd_d:_                           -0.300             
exprssd_dffhrd:ct_dff_lvlm            -0.306        0.510
exprssd_dffhrd:ct_dff_lvlh            -0.693        0.506
act_typmntl:ct_dff_lvlm               -0.307        0.479
act_typmntl:ct_dff_lvlh               -0.701        0.472
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm  0.212       -0.708
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh  0.492       -0.710
                                      exprssd_dffhrd:ct_dff_lvlm
exprssd_dff                                                     
act_typmntl                                                     
act_dff_lvlm                                                    
act_dff_lvlh                                                    
exprssd_d:_                                                     
exprssd_dffhrd:ct_dff_lvlm                                      
exprssd_dffhrd:ct_dff_lvlh             0.514                    
act_typmntl:ct_dff_lvlm                0.485                    
act_typmntl:ct_dff_lvlh                0.214                    
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm -0.704                    
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh -0.365                    
                                      exprssd_dffhrd:ct_dff_lvlh
exprssd_dff                                                     
act_typmntl                                                     
act_dff_lvlm                                                    
act_dff_lvlh                                                    
exprssd_d:_                                                     
exprssd_dffhrd:ct_dff_lvlm                                      
exprssd_dffhrd:ct_dff_lvlh                                      
act_typmntl:ct_dff_lvlm                0.213                    
act_typmntl:ct_dff_lvlh                0.485                    
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm -0.358                    
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh -0.711                    
                                      act_typmntl:ct_dff_lvlm
exprssd_dff                                                  
act_typmntl                                                  
act_dff_lvlm                                                 
act_dff_lvlh                                                 
exprssd_d:_                                                  
exprssd_dffhrd:ct_dff_lvlm                                   
exprssd_dffhrd:ct_dff_lvlh                                   
act_typmntl:ct_dff_lvlm                                      
act_typmntl:ct_dff_lvlh                0.485                 
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm -0.692                 
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh -0.341                 
                                      act_typmntl:ct_dff_lvlh
exprssd_dff                                                  
act_typmntl                                                  
act_dff_lvlm                                                 
act_dff_lvlh                                                 
exprssd_d:_                                                  
exprssd_dffhrd:ct_dff_lvlm                                   
exprssd_dffhrd:ct_dff_lvlh                                   
act_typmntl:ct_dff_lvlm                                      
act_typmntl:ct_dff_lvlh                                      
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm -0.335                 
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh -0.701                 
                                      exprssd_dffhrd:ct_typmntl:ct_dff_lvlm
exprssd_dff                                                                
act_typmntl                                                                
act_dff_lvlm                                                               
act_dff_lvlh                                                               
exprssd_d:_                                                                
exprssd_dffhrd:ct_dff_lvlm                                                 
exprssd_dffhrd:ct_dff_lvlh                                                 
act_typmntl:ct_dff_lvlm                                                    
act_typmntl:ct_dff_lvlh                                                    
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm                                      
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh  0.504                               
car::vif(lm(response ~ expressed_diff*act_type*act_diff, data=d_task2_after_exclusion),type="predictor") #no multicollinearity issue among predictors
GVIFs computed for predictors
               GVIF Df GVIF^(1/(2*Df))           Interacts With
expressed_diff    1 11               1       act_type, act_diff
act_type          1 11               1 expressed_diff, act_diff
act_diff          1 11               1 expressed_diff, act_type
               Other Predictors
expressed_diff             --  
act_type                   --  
act_diff                   --  

I don’t know which model to choose.