R course assignment

Author

Marie Lasrado

Published

October 9, 2024

Data import

  1. Importing Excel file from directory.
data<-read_excel("experiment_data.xlsx", col_names = TRUE)

Data cleaning

  1. Change coloumn names
names(data) <- c("heart_rate", "stress_level", "treatment")

  1. Check data structure
str(data)
tibble [300 × 3] (S3: tbl_df/tbl/data.frame)
 $ heart_rate  : num [1:300] 63.7 49.7 57.5 53.5 70.7 ...
 $ stress_level: chr [1:300] "4.370861069626263" "9.556428757689245" "7.587945476302646" "6.387926357773329" ...
 $ treatment   : chr [1:300] "Meditation" "Meditation" "Meditation" "Meditation" ...
mu_data<-mutate(data, stress_level=as.numeric(stress_level))
str(mu_data)
tibble [300 × 3] (S3: tbl_df/tbl/data.frame)
 $ heart_rate  : num [1:300] 63.7 49.7 57.5 53.5 70.7 ...
 $ stress_level: num [1:300] 4.37 9.56 7.59 6.39 2.4 ...
 $ treatment   : chr [1:300] "Meditation" "Meditation" "Meditation" "Meditation" ...
hist(mu_data$heart_rate)

hist(mu_data$stress_level)

table(mu_data$treatment)

               Deep Breathing                    Meditation 
                           92                            97 
Progressive Muscle Relaxation 
                           96 

  1. Remove rows with missing values
missing_participants<-filter(mu_data, is.na(treatment))
Complete_data<-filter(mu_data, !is.na(treatment))

Data analysis

  1. Find the mean heart rate for each treatment
Complete_data %>% 
  group_by(treatment) %>%
  summarise(heart_rate = mean(heart_rate))
# A tibble: 3 × 2
  treatment                     heart_rate
  <chr>                              <dbl>
1 Deep Breathing                      74.8
2 Meditation                          61.6
3 Progressive Muscle Relaxation       74.7

  1. Regression
model1 <- lm(heart_rate ~ treatment, data = Complete_data)
summary(model1)

Call:
lm(formula = heart_rate ~ treatment, data = Complete_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-20.4547  -3.7000   0.3046   4.1560  15.5804 

Coefficients:
                                        Estimate Std. Error t value Pr(>|t|)
(Intercept)                             74.83386    0.66189 113.061   <2e-16
treatmentMeditation                    -13.19320    0.92391 -14.280   <2e-16
treatmentProgressive Muscle Relaxation  -0.08454    0.92625  -0.091    0.927
                                          
(Intercept)                            ***
treatmentMeditation                    ***
treatmentProgressive Muscle Relaxation    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 6.349 on 282 degrees of freedom
Multiple R-squared:  0.4933,    Adjusted R-squared:  0.4897 
F-statistic: 137.3 on 2 and 282 DF,  p-value: < 2.2e-16
model2 <- lm(heart_rate ~ stress_level, data = Complete_data)
summary(model2)

Call:
lm(formula = heart_rate ~ stress_level, data = Complete_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-26.364  -5.048   1.298   6.320  20.180 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   74.3593     1.1901  62.481   <2e-16 ***
stress_level  -0.7414     0.1967  -3.769    2e-04 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 8.688 on 283 degrees of freedom
Multiple R-squared:  0.04779,   Adjusted R-squared:  0.04443 
F-statistic:  14.2 on 1 and 283 DF,  p-value: 0.0001996
model_interact <- lm(heart_rate ~ treatment + stress_level, data = Complete_data)
summary(model_interact)

Call:
lm(formula = heart_rate ~ treatment + stress_level, data = Complete_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-17.341  -3.965   0.476   3.527  16.212 

Coefficients:
                                        Estimate Std. Error t value Pr(>|t|)
(Intercept)                             79.84018    0.96374  82.844  < 2e-16
treatmentMeditation                    -13.48936    0.85973 -15.690  < 2e-16
treatmentProgressive Muscle Relaxation  -0.01579    0.86084  -0.018    0.985
stress_level                            -0.90356    0.13391  -6.748 8.56e-11
                                          
(Intercept)                            ***
treatmentMeditation                    ***
treatmentProgressive Muscle Relaxation    
stress_level                           ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.9 on 281 degrees of freedom
Multiple R-squared:  0.5639,    Adjusted R-squared:  0.5593 
F-statistic: 121.1 on 3 and 281 DF,  p-value: < 2.2e-16

#comparing model fits

anova(model1, model2, model_interact)
Analysis of Variance Table

Model 1: heart_rate ~ treatment
Model 2: heart_rate ~ stress_level
Model 3: heart_rate ~ treatment + stress_level
  Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
1    282 11366.1                                  
2    283 21359.1 -1     -9993 287.09 < 2.2e-16 ***
3    281  9781.2  2     11578 166.31 < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plotting

  1. Plotting
#(model_2)
ggplot(model2, aes(y = stress_level, x = heart_rate)) +
  geom_point()+
  geom_smooth(method = "loess", se = TRUE)
`geom_smooth()` using formula = 'y ~ x'

ggplot(model_interact, aes(x=heart_rate, y=stress_level))+
  geom_point(alpha = 0.8)+
  geom_smooth(method = "loess", se = TRUE)+
  facet_wrap(~treatment)
`geom_smooth()` using formula = 'y ~ x'