CIVENG Data Analysis

# packages
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(lsr)
# setup
data = read.csv("civeng_data.csv")
colnames(data) = c("participant", "heart_rate", "blood_o2", "mood")
view(data)
unique(data$mood)
 [1] "Fearful"    "Happy "     "Surprised " "Happy"      "Bad "      
 [6] "Fearful "   "Disgusted " "Angry "     "Sad"        "Sad "      
[11] "Bad"       
data = data |>
  mutate(mood = mood |>
           as.character() |>
           str_trim() |>
           str_to_title()) |>
  group_by(mood)
# heart rate data summary
data |>
  select(heart_rate, mood) |>
  group_by(mood) |>
  summarize(mean_hr = mean(heart_rate), median_hr = median(heart_rate), sd_hr = sd(heart_rate), count = n())
# A tibble: 7 × 5
  mood      mean_hr median_hr sd_hr count
  <chr>       <dbl>     <dbl> <dbl> <int>
1 Angry        83.5      83    11.6    14
2 Bad          77.5      77    11.5    23
3 Disgusted    84        84    NA       1
4 Fearful      77.9      75    12.1     9
5 Happy        75.2      72    13.7    28
6 Sad          79        77.5  11.6    12
7 Surprised    83.8      89    15.5    13
# blood oxygen data summary
data |>
  select(blood_o2, mood) |>
  group_by(mood) |>
  summarize(mean_o2 = mean(blood_o2), median_o2 = median(blood_o2), sd_o2 = sd(blood_o2), count = n())
# A tibble: 7 × 5
  mood      mean_o2 median_o2 sd_o2 count
  <chr>       <dbl>     <dbl> <dbl> <int>
1 Angry        97.5        98  1.40    14
2 Bad          96.7        97  1.30    23
3 Disgusted    97          97 NA        1
4 Fearful      97.1        97  1.69     9
5 Happy        97.7        98  1.25    28
6 Sad          96.6        97  1.08    12
7 Surprised    96.8        97  1.52    13
ggplot(data, aes(x = heart_rate)) + geom_histogram(bins = 10) + facet_wrap(~ mood)

ggplot(data, aes(x = blood_o2, fill = mood)) + geom_histogram(bins = 5) + facet_wrap(~ mood)

aov_result <- aov(heart_rate ~ mood, data = data)
summary(aov_result)
            Df Sum Sq Mean Sq F value Pr(>F)
mood         6   1073   178.8   1.096  0.371
Residuals   93  15171   163.1               
etaSquared(aov(heart_rate ~ mood, data = data))
       eta.sq eta.sq.part
mood 0.066035    0.066035
kmeans_result <- kmeans(data[, c("heart_rate", "blood_o2")], centers = 3)
data$cluster <- kmeans_result$cluster

ggplot(data, aes(x = heart_rate, y = blood_o2, color = factor(cluster))) +
  geom_point() + facet_wrap(~ mood) +
  labs(title = "K-Means Clustering of Heart Rate and Blood Oxygen",
       x = "Heart Rate",
       y = "Blood Oxygen Level",
       color = "Cluster")

data$mood = factor(data$mood)
lm_model <- lm(heart_rate ~ mood + blood_o2, data = data)
summary(lm_model)

Call:
lm(formula = heart_rate ~ mood + blood_o2, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-23.8145 -10.3059  -0.7613  11.3527  24.0213 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept)    54.7794    96.5174   0.568   0.5717  
moodBad        -5.7720     4.4309  -1.303   0.1959  
moodDisgusted   0.6473    13.2947   0.049   0.9613  
moodFearful    -5.4966     5.4972  -1.000   0.3200  
moodHappy      -8.3740     4.2050  -1.991   0.0494 *
moodSad        -4.2300     5.1301  -0.825   0.4118  
moodSurprised   0.4618     4.9857   0.093   0.9264  
blood_o2        0.2946     0.9893   0.298   0.7666  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 12.84 on 92 degrees of freedom
Multiple R-squared:  0.06693,   Adjusted R-squared:  -0.00406 
F-statistic: 0.9428 on 7 and 92 DF,  p-value: 0.4778
ggplot(data, aes(x = blood_o2, y = heart_rate, color = mood)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", se = FALSE, color = "black") +  # Add regression line
  labs(title = "Heart Rate vs Blood Oxygen by Mood",
       x = "Blood Oxygen",
       y = "Heart Rate") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(data, aes(x = mood, y = heart_rate, fill = mood)) +
  geom_boxplot() +
  labs(title = "Heart Rate by Mood",
       x = "Mood",
       y = "Heart Rate") +
  theme_minimal()

ggplot(data, aes(x = heart_rate, y = blood_o2)) +
  geom_point() +  # Add points to the plot
  geom_smooth(method = "lm", color = "blue") +  # Add a linear regression line
  labs(title = "Scatter Plot of Heart Rate vs Blood Oxygen",
       x = "Heart Rate",
       y = "Blood Oxygen") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'