Data Wrangling
data_path <- "/Users/apple/Desktop/JHU/Energy_Data_Analysis/main_expt2_task2_data_raw"
mean_dif <- read.csv ("/Users/apple/Desktop/JHU/Energy_Data_Analysis/mean_dif.csv" )
file_paths <- list.files (path = data_path, full.names = TRUE )
d_task2 <- tidytable:: map_df (file_paths, ~ read.csv (.x) %>%
mutate (filename = basename (.x)))
d_task2$ response <- as.numeric (d_task2$ response)
Warning: NAs introduced by coercion
d_task2_filtered <- d_task2 |>
dplyr:: filter (task %in% c ("energy-compcheck" ,"diff-compcheck" ,"energy-rating" )) %>% #subset to relevant trials
dplyr:: group_by (filename) %>%
mutate (correct = case_when (
activity == "att_middle" & response > 48 & response < 52 ~ "true" ,
activity == "att_right" & as.numeric (response) == 100 ~ "true" ,
activity %in% c ("att_middle" , "att_right" ) ~ "false" ,
TRUE ~ correct
)) %>%
dplyr:: filter (sum (correct == "false" ,na.rm = TRUE ) < 2 ) #remove 2+ fails in comp+att checks
length (unique (d_task2_filtered$ filename)) # n=84
#cleaning up to test trials + energy level
d_task2_clean <- d_task2_filtered %>%
filter (! task %in% c ("energy-compcheck" ,"diff-compcheck" ) & ! activity %in% c ("att_right" ,"att_middle" )) %>%
mutate (expressed_diff = case_when (
str_detect (stimulus, "so hard" ) ~ "hard" ,
str_detect (stimulus, "so easy" ) ~ "easy" ,
TRUE ~ NA
))
#create pseudo id
d_task2_clean <- d_task2_clean %>%
group_by (filename) %>%
mutate (subjectID = sprintf ("sub%03d" , cur_group_id ())) %>%
ungroup () #add pseudo id
d_task2_clean$ subjectID <- as.factor (d_task2_clean$ subjectID)
#adding act_type and act_diff (continuous & categorical)
d_task2_labeled <- d_task2_clean %>%
mutate (act_type = case_when (
str_detect (activity, "run" ) | str_detect (activity, "clean" ) | str_detect (activity, "hike" ) | str_detect (activity, "jump" ) | str_detect (activity, "climb" ) | str_detect (activity, "swim" ) ~ "physical" ,
str_detect (activity, "handwriting" ) | str_detect (activity, "math" ) | str_detect (activity, "violin" ) | str_detect (activity, "homework" ) | str_detect (activity, "flashcards" ) | str_detect (activity, "alphabet" ) ~ "mental" ,
TRUE ~ "restorative" ))
d_task2_labeled <- d_task2_labeled %>%
mutate (activity = case_when (
activity %in% c ("do their homework" ) ~ "do homework" ,
activity %in% c ("listen to their favorite stories" ) ~ "listen to favorite stories" ,
activity %in% c ("watch their favorite show on the couch" ) ~ "watch favorite show on the couch" ,
TRUE ~ activity))
d_task2_merged <- d_task2_labeled |>
dplyr:: left_join (mean_dif, by = "activity" )
d_task2_merged$ act_mean_diff <- d_task2_merged$ act_diff
d_task2_merged <- d_task2_merged %>%
mutate (act_diff_level = case_when (
str_detect (stimulus, "run" ) | str_detect (stimulus, "climb" ) | str_detect (stimulus, "math" ) | str_detect (stimulus, "homework" ) ~ "hard" ,
str_detect (stimulus, "hike" ) | str_detect (stimulus, "house" ) | str_detect (stimulus, "violin" ) | str_detect (stimulus, "handwriting" ) ~ "medium" ,
str_detect (stimulus, "jump" ) | str_detect (stimulus, "swim" ) | str_detect (stimulus, "alphabet" ) | str_detect (stimulus, "sounding" ) ~ "easy" ,
TRUE ~ NA
))
#Removing outliers
library (olsrr)
library (influence.ME)
d_task2_phys_ment <- d_task2_merged |>
filter (act_type== "physical" | act_type== "mental" )
infl <- influence (lmer (formula = response ~ expressed_diff + (1 | subjectID), data= d_task2_phys_ment), group = "subjectID" )
cooksd <- cooks.distance (infl, sort= TRUE )
plot (infl, which = "cook" ,
cutoff= 4 / 84 , sort= TRUE ,) # 22 outliers at threshold 4/N
d_task2_after_exclusion <- d_task2_merged %>%
filter (! subjectID %in% rownames (cooksd)[cooksd > 4 / 84 ])
length (unique (d_task2_after_exclusion$ filename)) # n=62 in final sample
Outlier inspection
d_task2_outliers <- d_task2_merged %>%
filter (subjectID %in% rownames (cooksd)[cooksd > 4 / 81 ])
d_task2_outliers$ act_type <- factor (d_task2_outliers$ act_type, levels = c ("physical" , "mental" , "restorative" ))
d_task2_outliers$ act_diff_level <- factor (d_task2_outliers$ act_diff_level, levels = c ("easy" , "medium" , "hard" ))
d_task2_outliers$ expressed_diff <- factor (d_task2_outliers$ expressed_diff)
ggplot (d_task2_outliers,
aes (x = act_type, y= response,fill= act_diff_level))+
stat_boxplot (geom= 'errorbar' )+
geom_boxplot () +
facet_wrap (~ expressed_diff) +
# stat_summary(fun.y=mean, geom="point", size=1)+
theme_minimal ()+
scale_fill_brewer (palette= "Oranges" )+
labs (title= "Outlier Data: Inferred Energy Given Expressed Difficulty" ,x= "Expressed Difficulty" ,y= "Inferred Energy" ,fill= "difficulty level" )+
theme (plot.title = element_text (size= 15 ))
A lot of variability/noise
Visualization
d_task2_after_exclusion$ act_type <- factor (d_task2_after_exclusion$ act_type, levels = c ("physical" , "mental" , "restorative" ))
d_task2_after_exclusion$ act_diff_level <- factor (d_task2_after_exclusion$ act_diff_level, levels = c ("easy" , "medium" , "hard" ))
d_task2_after_exclusion$ expressed_diff <- factor (d_task2_after_exclusion$ expressed_diff)
#ggplot(subset(d_task2_after_exclusion, act_type!= "restorative"),
ggplot (d_task2_after_exclusion,
aes (x = act_type, y= response,fill= act_diff_level))+
stat_boxplot (geom= 'errorbar' )+
geom_boxplot () +
facet_wrap (~ expressed_diff) +
# stat_summary(fun.y=mean, geom="point", size=1)+
theme_minimal ()+
scale_fill_brewer (palette= "Oranges" )+
labs (title= "Inferred Energy Given Expressed Difficulty" ,x= "Expressed Difficulty" ,y= "Inferred Energy" ,fill= "difficulty level" )+
theme (plot.title = element_text (size= 15 ))
we see an effect of expressed difficulty - adults infer people are overall energetic when they expressed ease doing something and tired when they expressed difficulty doing something.
also differ by activity type -
when subjective cost of activities is low, adults are likely to infer less energy for mental activities than physical activities.
when subjective cost of activities is high, adults are likely to infer higher energy for mental activities than physical activities. It seems that subjective cost more directly maps onto adults’ energy inferences for physical activities than mental activities.
It is unclear from the visualization whether difficulty level of activities has additional effect on adults’ energy inference.
Modeling
library (lmerTest)
levels (d_task2_after_exclusion$ expressed_diff)
levels (d_task2_after_exclusion$ act_diff_level)
[1] "easy" "medium" "hard"
levels (d_task2_after_exclusion$ act_type)
[1] "physical" "mental" "restorative"
d_task2_after_exclusion_physment <- d_task2_after_exclusion |>
filter (act_type!= "restorative" )
# Primary H
model1 <- lmer (data= d_task2_after_exclusion_physment, formula= response ~ expressed_diff + (1 | subjectID))
summary (model1)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: response ~ expressed_diff + (1 | subjectID)
Data: d_task2_after_exclusion_physment
REML criterion at convergence: 6483.4
Scaled residuals:
Min 1Q Median 3Q Max
-3.7494 -0.6490 0.0767 0.5988 4.1869
Random effects:
Groups Name Variance Std.Dev.
subjectID (Intercept) 29.78 5.457
Residual 338.56 18.400
Number of obs: 744, groups: subjectID, 62
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 86.509 1.173 125.230 73.73 <2e-16 ***
expressed_diffhard -61.613 1.377 723.466 -44.75 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr)
exprssd_dff -0.566
people infer different energy level depending on the agents’ expressed difficulty (harder->less energy) for physical and mental energy.
# Exploratory Q1 - does energy judgment change depending on activity type and/or activity difficulty, on top of expressed diff, for all activities?
model2 <- lmer (data= d_task2_after_exclusion, formula= response ~ expressed_diff + act_type + act_mean_diff+ (1 | subjectID))
summary (model2) # NO
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: response ~ expressed_diff + act_type + act_mean_diff + (1 | subjectID)
Data: d_task2_after_exclusion
REML criterion at convergence: 10259.3
Scaled residuals:
Min 1Q Median 3Q Max
-3.5885 -0.6578 -0.0552 0.6556 3.1320
Random effects:
Groups Name Variance Std.Dev.
subjectID (Intercept) 15.41 3.925
Residual 568.34 23.840
Number of obs: 1116, groups: subjectID, 62
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 88.99658 4.03301 1109.75046 22.067 <2e-16 ***
expressed_diffhard -52.11040 1.44306 1102.61821 -36.111 <2e-16 ***
act_typemental -2.87692 1.79054 1050.07083 -1.607 0.1084
act_typerestorative -6.00492 3.28401 1050.31555 -1.829 0.0678 .
act_mean_diff -0.09062 0.05753 1050.20724 -1.575 0.1155
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) exprs_ act_typm act_typr
exprssd_dff -0.135
act_typmntl -0.415 0.006
act_typrstr -0.896 -0.058 0.442
act_men_dff -0.927 -0.043 0.216 0.846
Energy judgment does not change depending on activity type and/or activity mean difficulty, on top of expressed diff.
# Exploratory Q2 - does effect of expressed diff on energy judgment change depending on activity type, taking into account mean difficulty?
model3 <- lmer (data= d_task2_after_exclusion, formula= response ~ expressed_diff* act_type + act_mean_diff + (1 | subjectID))
plot (allEffects (model3))
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: response ~ expressed_diff * act_type + act_mean_diff + (1 | subjectID)
Data: d_task2_after_exclusion
REML criterion at convergence: 10127.3
Scaled residuals:
Min 1Q Median 3Q Max
-3.2530 -0.6213 -0.0013 0.4960 3.5408
Random effects:
Groups Name Variance Std.Dev.
subjectID (Intercept) 10.49 3.239
Residual 511.68 22.620
Number of obs: 1116, groups: subjectID, 62
Fixed effects:
Estimate Std. Error df t value
(Intercept) 96.55475 3.88489 1106.17909 24.854
expressed_diffhard -71.23335 2.36892 1100.67386 -30.070
act_typemental -12.09856 2.32772 1083.22723 -5.198
act_typerestorative -24.45691 3.51446 1067.81275 -6.959
act_mean_diff -0.06241 0.05474 1048.43401 -1.140
expressed_diffhard:act_typemental 19.13293 3.35515 1105.70114 5.703
expressed_diffhard:act_typerestorative 38.38212 3.35468 1107.53513 11.441
Pr(>|t|)
(Intercept) < 2e-16 ***
expressed_diffhard < 2e-16 ***
act_typemental 2.41e-07 ***
act_typerestorative 5.97e-12 ***
act_mean_diff 0.255
expressed_diffhard:act_typemental 1.51e-08 ***
expressed_diffhard:act_typerestorative < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) exprs_ act_typm act_typr act_m_
exprssd_dff -0.224
act_typmntl -0.396 0.489
act_typrstr -0.857 0.270 0.411
act_men_dff -0.899 -0.082 0.106 0.731
exprssd_dffhrd:ct_typm 0.143 -0.708 -0.684 -0.179 0.075
exprssd_dffhrd:ct_typr 0.170 -0.707 -0.348 -0.459 0.045
exprssd_dffhrd:ct_typm
exprssd_dff
act_typmntl
act_typrstr
act_men_dff
exprssd_dffhrd:ct_typm
exprssd_dffhrd:ct_typr 0.501
model 3 results:
physical
96.6
25.3
mental
84.5
32.4
restorative
72.1
39.2
Effect of expressed diff on energy judgment changes depending on activity type, such that people infer 19.1 points higher energy when someone find mental activities hard, than when they find physical activities hard. People also infer 38.4 points higher energy when someone expresssed restorative activities are hard, than when they say physical activities are hard.
# Exploratory Q3 - does effect of expressed diff on energy judgment change depending on activity type and difficulty level (phys and ment only)?
model4 <- lmer (data= d_task2_after_exclusion_physment, formula= response ~ expressed_diff* act_diff_level + (1 | subjectID))
model5 <- lmer (data= d_task2_after_exclusion_physment, formula= response ~ expressed_diff* act_type + (1 | subjectID))
model6 <- lmer (data= d_task2_after_exclusion_physment, formula= response ~ expressed_diff* act_type* act_diff_level + (1 | subjectID))
anova (model4,model5,model6)
refitting model(s) with ML (instead of REML)
Data: d_task2_after_exclusion_physment
Models:
model5: response ~ expressed_diff * act_type + (1 | subjectID)
model4: response ~ expressed_diff * act_diff_level + (1 | subjectID)
model6: response ~ expressed_diff * act_type * act_diff_level + (1 | subjectID)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
model5 6 6444.5 6472.2 -3216.2 6432.5
model4 8 6500.1 6537.0 -3242.0 6484.1 0.000 2 1
model6 14 6452.0 6516.6 -3212.0 6424.0 60.054 6 4.388e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot (allEffects (model6)) #model 6's better but no significant interaction term
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: response ~ expressed_diff * act_type * act_diff_level + (1 |
subjectID)
Data: d_task2_after_exclusion_physment
REML criterion at convergence: 6381.5
Scaled residuals:
Min 1Q Median 3Q Max
-3.4626 -0.5914 -0.0311 0.4861 4.5362
Random effects:
Groups Name Variance Std.Dev.
subjectID (Intercept) 27.41 5.236
Residual 315.22 17.754
Number of obs: 744, groups: subjectID, 62
Fixed effects:
Estimate Std. Error
(Intercept) 93.5896 2.1871
expressed_diffhard -72.8821 3.3263
act_typemental -12.4405 3.1148
act_diff_levelmedium -1.5650 3.1420
act_diff_levelhard -1.9630 3.1600
expressed_diffhard:act_typemental 21.2651 4.6515
expressed_diffhard:act_diff_levelmedium 1.2753 4.6510
expressed_diffhard:act_diff_levelhard 3.5285 4.6575
act_typemental:act_diff_levelmedium 2.5669 4.4452
act_typemental:act_diff_levelhard -0.5847 4.4956
expressed_diffhard:act_typemental:act_diff_levelmedium 0.9319 6.5637
expressed_diffhard:act_typemental:act_diff_levelhard -5.9890 6.5249
df t value
(Intercept) 623.0098 42.792
expressed_diffhard 718.6052 -21.911
act_typemental 694.9238 -3.994
act_diff_levelmedium 693.2524 -0.498
act_diff_levelhard 695.1524 -0.621
expressed_diffhard:act_typemental 715.5607 4.572
expressed_diffhard:act_diff_levelmedium 714.2498 0.274
expressed_diffhard:act_diff_levelhard 715.5534 0.758
act_typemental:act_diff_levelmedium 694.2412 0.577
act_typemental:act_diff_levelhard 690.5150 -0.130
expressed_diffhard:act_typemental:act_diff_levelmedium 714.9136 0.142
expressed_diffhard:act_typemental:act_diff_levelhard 708.3715 -0.918
Pr(>|t|)
(Intercept) < 2e-16 ***
expressed_diffhard < 2e-16 ***
act_typemental 7.19e-05 ***
act_diff_levelmedium 0.619
act_diff_levelhard 0.535
expressed_diffhard:act_typemental 5.70e-06 ***
expressed_diffhard:act_diff_levelmedium 0.784
expressed_diffhard:act_diff_levelhard 0.449
act_typemental:act_diff_levelmedium 0.564
act_typemental:act_diff_levelhard 0.897
expressed_diffhard:act_typemental:act_diff_levelmedium 0.887
expressed_diffhard:act_typemental:act_diff_levelhard 0.359
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) exprs_ act_ty act_dff_lvlm
exprssd_dff -0.613
act_typmntl -0.637 0.429
act_dff_lvlm -0.633 0.429 0.443
act_dff_lvlh -0.628 0.424 0.438 0.439
exprssd_d:_ 0.438 -0.714 -0.685 -0.304
exprssd_dffhrd:ct_dff_lvlm 0.440 -0.717 -0.306 -0.690
exprssd_dffhrd:ct_dff_lvlh 0.438 -0.714 -0.304 -0.308
act_typmntl:ct_dff_lvlm 0.446 -0.300 -0.700 -0.705
act_typmntl:ct_dff_lvlh 0.441 -0.298 -0.691 -0.307
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm -0.309 0.505 0.485 0.486
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh -0.313 0.510 0.485 0.218
act_dff_lvlh exp_:_
exprssd_dff
act_typmntl
act_dff_lvlm
act_dff_lvlh
exprssd_d:_ -0.300
exprssd_dffhrd:ct_dff_lvlm -0.306 0.510
exprssd_dffhrd:ct_dff_lvlh -0.693 0.506
act_typmntl:ct_dff_lvlm -0.307 0.479
act_typmntl:ct_dff_lvlh -0.701 0.472
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm 0.212 -0.708
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh 0.492 -0.710
exprssd_dffhrd:ct_dff_lvlm
exprssd_dff
act_typmntl
act_dff_lvlm
act_dff_lvlh
exprssd_d:_
exprssd_dffhrd:ct_dff_lvlm
exprssd_dffhrd:ct_dff_lvlh 0.514
act_typmntl:ct_dff_lvlm 0.485
act_typmntl:ct_dff_lvlh 0.214
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm -0.704
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh -0.365
exprssd_dffhrd:ct_dff_lvlh
exprssd_dff
act_typmntl
act_dff_lvlm
act_dff_lvlh
exprssd_d:_
exprssd_dffhrd:ct_dff_lvlm
exprssd_dffhrd:ct_dff_lvlh
act_typmntl:ct_dff_lvlm 0.213
act_typmntl:ct_dff_lvlh 0.485
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm -0.358
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh -0.711
act_typmntl:ct_dff_lvlm
exprssd_dff
act_typmntl
act_dff_lvlm
act_dff_lvlh
exprssd_d:_
exprssd_dffhrd:ct_dff_lvlm
exprssd_dffhrd:ct_dff_lvlh
act_typmntl:ct_dff_lvlm
act_typmntl:ct_dff_lvlh 0.485
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm -0.692
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh -0.341
act_typmntl:ct_dff_lvlh
exprssd_dff
act_typmntl
act_dff_lvlm
act_dff_lvlh
exprssd_d:_
exprssd_dffhrd:ct_dff_lvlm
exprssd_dffhrd:ct_dff_lvlh
act_typmntl:ct_dff_lvlm
act_typmntl:ct_dff_lvlh
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm -0.335
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh -0.701
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm
exprssd_dff
act_typmntl
act_dff_lvlm
act_dff_lvlh
exprssd_d:_
exprssd_dffhrd:ct_dff_lvlm
exprssd_dffhrd:ct_dff_lvlh
act_typmntl:ct_dff_lvlm
act_typmntl:ct_dff_lvlh
exprssd_dffhrd:ct_typmntl:ct_dff_lvlm
exprssd_dffhrd:ct_typmntl:ct_dff_lvlh 0.504
car:: vif (lm (response ~ expressed_diff* act_type* act_diff, data= d_task2_after_exclusion),type= "predictor" ) #no multicollinearity issue among predictors
GVIFs computed for predictors
GVIF Df GVIF^(1/(2*Df)) Interacts With
expressed_diff 1 11 1 act_type, act_diff
act_type 1 11 1 expressed_diff, act_diff
act_diff 1 11 1 expressed_diff, act_type
Other Predictors
expressed_diff --
act_type --
act_diff --
I don’t know which model to choose.