energy_study2_pilot

Data Wrangling

library(purrr)
library(dplyr)
library(tidytable)
library(stringr)
library(ggplot2)

data_path <- "/Users/apple/Downloads/energy_pilot2_raw"
file_paths <- list.files(path = data_path, full.names = TRUE) 
d <- map_df(file_paths, ~ read.csv(.x) %>% 
              mutate(filename = basename(.x))) #creates a joined df with separate column for filename

#data cleaning
d_filtered <- d |>
  filter(task %in% c("energy-compcheck","diff-compcheck","task2","task3")) #subset to relevant trials

d_filtered <- d_filtered %>%
  group_by(mturk_participant_id) %>%
  filter(!any(correct == "false"))  # Remove comprehension check non-passers

length(unique(d_filtered$mturk_participant_id))
[1] 27
d_filtered$response <- as.numeric(d_filtered$response)

#create new label for activity type (phys vs. mental)
d_labeled <- d_filtered %>%
  mutate(act_type = case_when(
    str_detect(stimulus, "running") | str_detect(stimulus, "cleaning") | str_detect(stimulus, "hiking") | str_detect(stimulus, "jumping") | str_detect(stimulus, "climbing") | str_detect(stimulus, "swimming") ~ "physical",
    str_detect(stimulus, "handwriting") | str_detect(stimulus, "math") | str_detect(stimulus, "violin") | str_detect(stimulus, "homework") | str_detect(stimulus, "flashcards") | str_detect(stimulus, "alphabet") ~ "mental",
    TRUE ~ NA 
  ))

#create new label for difficulty level(hard, medium, & easy)
d_labeled <- d_labeled %>%
  mutate(act_diff = case_when(
    str_detect(stimulus, "running") | str_detect(stimulus, "climbing") | str_detect(stimulus, "math") | str_detect(stimulus, "homework") ~ "hard",
    str_detect(stimulus, "hiking") | str_detect(stimulus, "jumping") | str_detect(stimulus, "handwriting") | str_detect(stimulus, "alphabet") ~ "medium",
    str_detect(stimulus, "cleaning") | str_detect(stimulus, "swimming") | str_detect(stimulus, "violin") | str_detect(stimulus, "flashcards") ~ "easy",
    TRUE ~ NA 
  ))

task2 <- d_labeled |>
  filter(task == "task2")

task3 <- d_labeled |>
  filter(task == "task3")

Task 2

Visualization

#length(unique(task2$stimulus))

#create new label for energy level
task2 <- task2 %>%
  mutate(energy_level = case_when(
    str_detect(stimulus, "extremely tired") ~ "0",
    str_detect(stimulus, "extremely energetic") ~ "100",
    TRUE ~ NA  # Default case if no keyword is found
  ))

#removing NAs
task2 <- task2 |>
  filter(if_any(c(energy_level, act_type, act_diff), ~ !is.na(.)))
task2$act_diff <- factor(task2$act_diff, levels = c("easy","medium","hard"))

#plotting
ggplot(task2, aes(x = act_type, y=response, fill=act_diff))+ 
  stat_boxplot(geom='errorbar')+
  geom_boxplot() +
  facet_wrap(~energy_level,labeller = labeller(energy_level = c("0" = "Extremely Tired", "100" = "Extremely Energetic"))) +
 # stat_summary(fun.y=mean, geom="point", size=1)+
  theme_minimal()+
  scale_fill_brewer(palette="Oranges")+
  labs(title="Inferred Difficulty Given Energy Level",x="Energy Level",y="Inferred Difficulty",fill="difficulty level")+
  theme(plot.title = element_text(size=15))

Difficulty ratings are clearly different across two energy levels. In “extremely tired” trials, there also seems to be differences across activity types (mental/ physical), as well as across difficulty levels (easy/ medium/ hard). But the difference is not so prominent in “extremely energetic” trials, where ratings are a lot more variable.

Modeling

task2$energy_level <- as.factor(task2$energy_level)
task2$act_diff <- as.factor(task2$act_diff)
task2$act_type <- as.factor(task2$act_type)

m2.1 <- lm(response ~ energy_level, data=task2,contrasts = list(energy_level = "contr.sum"))
m2.2 <- lm(response ~ energy_level*act_type*act_diff, data=task2,contrasts = list(energy_level = "contr.sum", act_type = "contr.sum"))

summary(m2.1)

Call:
lm(formula = response ~ energy_level, data = task2, contrasts = list(energy_level = "contr.sum"))

Residuals:
    Min      1Q  Median      3Q     Max 
-53.938 -16.420  -0.938  11.062  71.580 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)     58.179      1.762   33.02   <2e-16 ***
energy_level1   30.759      1.762   17.46   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 22.43 on 160 degrees of freedom
Multiple R-squared:  0.6557,    Adjusted R-squared:  0.6536 
F-statistic: 304.8 on 1 and 160 DF,  p-value: < 2.2e-16
summary(m2.2)

Call:
lm(formula = response ~ energy_level * act_type * act_diff, data = task2, 
    contrasts = list(energy_level = "contr.sum", act_type = "contr.sum"))

Residuals:
    Min      1Q  Median      3Q     Max 
-53.250 -15.654  -1.267   8.067  65.667 

Coefficients:
                                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)                             54.9333     3.0840  17.812   <2e-16 ***
energy_level1                           29.0583     3.0840   9.422   <2e-16 ***
act_type1                               -1.3583     3.0840  -0.440   0.6602    
act_diffmedium                           2.1583     4.3614   0.495   0.6214    
act_diffhard                             8.0750     4.3614   1.851   0.0661 .  
energy_level1:act_type1                 -2.9000     3.0840  -0.940   0.3486    
energy_level1:act_diffmedium             1.6917     4.3614   0.388   0.6987    
energy_level1:act_diffhard               3.1083     4.3614   0.713   0.4771    
act_type1:act_diffmedium                -2.0583     4.3614  -0.472   0.6377    
act_type1:act_diffhard                   2.0250     4.3614   0.464   0.6431    
energy_level1:act_type1:act_diffmedium   0.5583     4.3614   0.128   0.8983    
energy_level1:act_type1:act_diffhard    -1.0083     4.3614  -0.231   0.8175    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 22.52 on 150 degrees of freedom
Multiple R-squared:  0.6745,    Adjusted R-squared:  0.6506 
F-statistic: 28.25 on 11 and 150 DF,  p-value: < 2.2e-16
AIC(m2.1)
[1] 1471.432
AIC(m2.2)
[1] 1482.367

Adding the interaction terms increases AIC slightly and reduces R-squared - so the best model for predicting the overall response should be the one with the single predictor energy_level?

Difficulty ratings are significantly different across energy level condition - Grand mean for difficulty rating is 58.18, mean(energy_level=100) is 27.42, mean(energy_level=0) is 88.8. It looks like people think doing any activity would be easier when the individual is energetic.

# "extremely tired" subset
tired <- task2 |> filter(energy_level=="0")
tired$act_type <- relevel(tired$act_type, ref='mental')
tired$act_diff <- relevel(tired$act_diff, ref='hard')

m2.3 <- lm(response ~ act_type*act_diff, data=tired,contrasts = list(act_type = "contr.sum",act_diff = "contr.sum"))
summary(m2.3)

Call:
lm(formula = response ~ act_type * act_diff, data = tired, contrasts = list(act_type = "contr.sum", 
    act_diff = "contr.sum"))

Residuals:
    Min      1Q  Median      3Q     Max 
-53.250  -7.083   1.583   8.067  20.267 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)          89.0028     1.5933  55.861  < 2e-16 ***
act_type1            -4.4194     1.5933  -2.774  0.00699 ** 
act_diff1             6.1722     2.2533   2.739  0.00769 ** 
act_diff2            -5.0111     2.2533  -2.224  0.02916 *  
act_type1:act_diff1   1.1778     2.2533   0.523  0.60272    
act_type1:act_diff2   0.1611     2.2533   0.072  0.94319    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 14.25 on 75 degrees of freedom
Multiple R-squared:  0.1816,    Adjusted R-squared:  0.127 
F-statistic: 3.328 on 5 and 75 DF,  p-value: 0.009076
# "extremely energetic" subset
energetic <- task2 |> filter(energy_level=="100")
energetic$act_type <- relevel(energetic$act_type, ref='mental')
energetic$act_diff <- relevel(energetic$act_diff, ref='hard')

m2.4 <- lm(response ~ act_type*act_diff, data=energetic,contrasts = list(act_type = "contr.sum", act_diff = "contr.sum"))
summary(m2.4)

Call:
lm(formula = response ~ act_type * act_diff, data = energetic, 
    contrasts = list(act_type = "contr.sum", act_diff = "contr.sum"))

Residuals:
    Min      1Q  Median      3Q     Max 
-35.417 -24.333  -3.417  14.583  65.667 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)          27.6861     3.1848   8.693 5.71e-13 ***
act_type1             1.6806     3.1848   0.528    0.599    
act_diff1             3.1556     4.5039   0.701    0.486    
act_diff2            -1.8111     4.5039  -0.402    0.689    
act_type1:act_diff1   2.8944     4.5039   0.643    0.522    
act_type1:act_diff2  -0.1389     4.5039  -0.031    0.975    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 28.49 on 75 degrees of freedom
Multiple R-squared:  0.01616,   Adjusted R-squared:  -0.04943 
F-statistic: 0.2464 on 5 and 75 DF,  p-value: 0.9404

In “extremely tired” condition, there are significant differences across activity types - grand mean for difficulty rating is 89, mean(act_type = mental) = 84.6, mean(act_diff = hard) = 93.4. This means people think that when someone’s extremely tired, they tend to find physical activities harder than mental activities. Also, there’re significant differences across difficulty levels - mean(act_diff= hard) = 95, mean(act_diff = easy) = 84, so people do judge difficult activities as more difficult and easy activities as easier. No interaction effect was found.

No effect of action type or difficulty level found in “extremely energetic” condition - people think that when someone’s super energetic, they’ll likely find all activities similarly difficult/easy regardless of type and difficulty level.

Task 3

Visualization

#length(unique(task3$stimulus))

#create new label for expressed difficulty
task3 <- task3 %>%
  mutate(difficulty = case_when(
    str_detect(stimulus, "so hard") ~ "hard",
    str_detect(stimulus, "so easy") ~ "easy",
    TRUE ~ NA  # Default case if no keyword is found
  ))

#removing NAs
task3 <- task3 |>
  filter(if_any(c(difficulty, act_type, act_diff), ~ !is.na(.)))
task3$act_diff <- factor(task3$act_diff, levels = c("easy","medium","hard"))

#plotting
ggplot(task3, aes(x = act_type, y=response, fill=act_diff))+ 
  stat_boxplot(geom='errorbar')+
  geom_boxplot() +
  facet_wrap(~difficulty,labeller = labeller(difficulty = c("easy" = "That's going to be so easy", "hard" = "That's going to be so hard"))) +
 # stat_summary(fun.y=mean, geom="point", size=1)+
  theme_minimal()+
  scale_fill_brewer(palette="Oranges")+
  labs(title="Inferred Energy Level Given Expressed Difficulty",x="Expressed Difficulty",y="Inferred Energy",fill ="difficulty level")+
  theme(plot.title = element_text(size=15))

Energy ratings are clearly different across two difficulty levels. In “so easy” trials, there seem to be differences across activity types (mental/ physical), but not so much across difficulty levels. It’s hard to tell if the energy ratings differ across activity type or difficulty level in “so hard” trials.

Modeling

task3$difficulty <- as.factor(task3$difficulty)
task3$act_diff <- as.factor(task3$act_diff)
task3$act_type <- as.factor(task3$act_type)

m3.1 <- lm(response ~ difficulty, data=task3,contrasts = list(difficulty = "contr.sum"))
m3.2 <- lm(response ~ difficulty*act_type, data=task3,contrasts = list(difficulty = "contr.sum", act_type = "contr.sum"))

summary(m3.1)

Call:
lm(formula = response ~ difficulty, data = task3, contrasts = list(difficulty = "contr.sum"))

Residuals:
    Min      1Q  Median      3Q     Max 
-57.284 -10.284  -0.346  14.420  52.654 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   54.815      1.272   43.09   <2e-16 ***
difficulty1   30.469      1.272   23.95   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 16.19 on 160 degrees of freedom
Multiple R-squared:  0.7819,    Adjusted R-squared:  0.7805 
F-statistic: 573.6 on 1 and 160 DF,  p-value: < 2.2e-16
summary(m3.2)

Call:
lm(formula = response ~ difficulty * act_type, data = task3, 
    contrasts = list(difficulty = "contr.sum", act_type = "contr.sum"))

Residuals:
    Min      1Q  Median      3Q     Max 
-62.095 -10.988   0.357   9.905  53.357 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)             54.736      1.249  43.814   <2e-16 ***
difficulty1             30.363      1.249  24.305   <2e-16 ***
act_type1               -2.863      1.249  -2.292   0.0232 *  
difficulty1:act_type1   -2.133      1.249  -1.708   0.0897 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 15.89 on 158 degrees of freedom
Multiple R-squared:  0.7926,    Adjusted R-squared:  0.7887 
F-statistic: 201.3 on 3 and 158 DF,  p-value: < 2.2e-16
AIC(m3.1)
[1] 1365.93
AIC(m3.2)
[1] 1361.764

Adding the interaction terms reduces AIC but increases adjusted R-squared slightly- so the model best for predicting the overall response should be the one with the single predictor difficulty?

Energy ratings are significantly different across difficulty condition - Grand mean for energy rating is 54.8, mean(difficulty=easy) = 85.3, mean(difficulty=hard) = 24.35. People think that if it’s easy for someone to do something, they likely have high energy level, and vice versa.

# "so hard" subset
hard <- task3 |> filter(difficulty=="hard")
hard$act_type <- relevel(hard$act_type, ref='mental')
hard$act_diff <- relevel(hard$act_diff, ref='hard')

m3.3 <- lm(response ~ act_type*act_diff, data=hard,contrasts = list(act_type = "contr.sum",act_diff = "contr.sum"))
summary(m3.3)

Call:
lm(formula = response ~ act_type * act_diff, data = hard, contrasts = list(act_type = "contr.sum", 
    act_diff = "contr.sum"))

Residuals:
    Min      1Q  Median      3Q     Max 
-26.750 -13.667  -0.750   5.333  52.467 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)          24.5833     1.9465  12.629   <2e-16 ***
act_type1            -0.7333     1.9465  -0.377    0.707    
act_diff1             1.8167     2.7528   0.660    0.511    
act_diff2            -1.8750     2.7528  -0.681    0.498    
act_type1:act_diff1  -0.5333     2.7528  -0.194    0.847    
act_type1:act_diff2  -2.3083     2.7528  -0.839    0.404    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 17.41 on 75 degrees of freedom
Multiple R-squared:  0.02623,   Adjusted R-squared:  -0.03869 
F-statistic: 0.404 on 5 and 75 DF,  p-value: 0.8446
# "so easy" subset
easy <- task3 |> filter(difficulty=="easy")
easy$act_type <- relevel(easy$act_type, ref='mental')
easy$act_diff <- relevel(easy$act_diff, ref='hard')

m3.4 <- lm(response ~ act_type*act_diff, data=easy,contrasts = list(act_type = "contr.sum",act_diff = "contr.sum"))
summary(m3.4)

Call:
lm(formula = response ~ act_type * act_diff, data = easy, contrasts = list(act_type = "contr.sum", 
    act_diff = "contr.sum"))

Residuals:
    Min      1Q  Median      3Q     Max 
-60.917  -6.333   4.667  10.200  21.667 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)          84.9750     1.6447  51.667  < 2e-16 ***
act_type1            -5.0417     1.6447  -3.065  0.00302 ** 
act_diff1             0.8583     2.3259   0.369  0.71315    
act_diff2            -1.4083     2.3259  -0.605  0.54668    
act_type1:act_diff1  -2.4583     2.3259  -1.057  0.29394    
act_type1:act_diff2   0.8083     2.3259   0.348  0.72916    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 14.71 on 75 degrees of freedom
Multiple R-squared:  0.1278,    Adjusted R-squared:  0.06969 
F-statistic: 2.199 on 5 and 75 DF,  p-value: 0.06322

In “so easy” condition, there are significant differences across activity types - grand mean for energy rating is 85, mean(act_type = mental) = 80, mean(act_diff = physical ) = 90. This means people think that others’ energy level tend to be higher if they find it easy to do an physical activity, as compared to a mental activity. No significant differences across difficulty levels were found - people think that others’ energy levels are high when they find it easy to do activities, regardless of how difficult the activities are.

No effect of action type or difficulty level found in “so hard” condition - people think that when someone find it extremely hard to do something, their energy level is low regardless of activity type or difficulty level.