setwd(“C:/Work Files/Collaboration/Pontus/Israel Hamas Conflict”)
Steps to create a longitudinal variable which reflects the time-point specific deviation from each individual participants own longitudinal average.
Creating an example dataset:
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Set the seed for reproducibility
set.seed(123)
# Number of participants
num_participants <- 50
# Number of waves
num_waves <- 5
# Generate participant IDs
participant_id <- rep(1:num_participants, each = num_waves)
# Generate wave variable
wave <- rep(1:num_waves, times = num_participants)
# Generate outcome variable data
# Assuming a normally distributed outcome variable
outcome <- rnorm(num_participants * num_waves, mean = 50, sd = 10)
# Create the data frame
longitudinal_data <- data.frame(
participant_id = participant_id,
wave = wave,
outcome = outcome
)
# View the first few rows of the data frame
print("Longitudinal Data Frame:")
## [1] "Longitudinal Data Frame:"
head(longitudinal_data, 10)
## participant_id wave outcome
## 1 1 1 44.39524
## 2 1 2 47.69823
## 3 1 3 65.58708
## 4 1 4 50.70508
## 5 1 5 51.29288
## 6 2 1 67.15065
## 7 2 2 54.60916
## 8 2 3 37.34939
## 9 2 4 43.13147
## 10 2 5 45.54338
# Reshape data from long to wide format
wide_data <- longitudinal_data %>%
pivot_wider(
names_from = wave,
values_from = outcome,
names_prefix = "wave_"
)
print("Converted Wide Format Data:")
## [1] "Converted Wide Format Data:"
print(wide_data)
## # A tibble: 50 × 6
## participant_id wave_1 wave_2 wave_3 wave_4 wave_5
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 44.4 47.7 65.6 50.7 51.3
## 2 2 67.2 54.6 37.3 43.1 45.5
## 3 3 62.2 53.6 54.0 51.1 44.4
## 4 4 67.9 55.0 30.3 57.0 45.3
## 5 5 39.3 47.8 39.7 42.7 43.7
## 6 6 33.1 58.4 51.5 38.6 62.5
## 7 7 54.3 47.0 59.0 58.8 58.2
## 8 8 56.9 55.5 49.4 46.9 46.2
## 9 9 43.1 47.9 37.3 71.7 62.1
## 10 10 38.8 46.0 45.3 57.8 49.2
## # ℹ 40 more rows
Create a within person mean variable
wide_data <- wide_data %>%
mutate(within_mean = rowMeans(select(., wave_1:wave_5), na.rm = TRUE))
head(wide_data)
## # A tibble: 6 × 7
## participant_id wave_1 wave_2 wave_3 wave_4 wave_5 within_mean
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 44.4 47.7 65.6 50.7 51.3 51.9
## 2 2 67.2 54.6 37.3 43.1 45.5 49.6
## 3 3 62.2 53.6 54.0 51.1 44.4 53.1
## 4 4 67.9 55.0 30.3 57.0 45.3 51.1
## 5 5 39.3 47.8 39.7 42.7 43.7 42.7
## 6 6 33.1 58.4 51.5 38.6 62.5 48.8
Create five new variables representing time specific deviation from individual averages
wide_data$deviation_1 <-(wide_data$wave_1 - wide_data$within_mean)
wide_data$deviation_2 <-(wide_data$wave_2 - wide_data$within_mean)
wide_data$deviation_3 <-(wide_data$wave_3 - wide_data$within_mean)
wide_data$deviation_4 <-(wide_data$wave_4 - wide_data$within_mean)
wide_data$deviation_5 <-(wide_data$wave_5 - wide_data$within_mean)
head(wide_data)
## # A tibble: 6 × 12
## participant_id wave_1 wave_2 wave_3 wave_4 wave_5 within_mean deviation_1
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 44.4 47.7 65.6 50.7 51.3 51.9 -7.54
## 2 2 67.2 54.6 37.3 43.1 45.5 49.6 17.6
## 3 3 62.2 53.6 54.0 51.1 44.4 53.1 9.16
## 4 4 67.9 55.0 30.3 57.0 45.3 51.1 16.8
## 5 5 39.3 47.8 39.7 42.7 43.7 42.7 -3.35
## 6 6 33.1 58.4 51.5 38.6 62.5 48.8 -15.7
## # ℹ 4 more variables: deviation_2 <dbl>, deviation_3 <dbl>, deviation_4 <dbl>,
## # deviation_5 <dbl>
Convert the data from wide back to long for ggplots first rename then restructure
colnames(wide_data)[colnames(wide_data)=="wave_1"]<-"Y1_wave1"
colnames(wide_data)[colnames(wide_data)=="wave_2"]<-"Y1_wave2"
colnames(wide_data)[colnames(wide_data)=="wave_3"]<-"Y1_wave3"
colnames(wide_data)[colnames(wide_data)=="wave_4"]<-"Y1_wave4"
colnames(wide_data)[colnames(wide_data)=="wave_5"]<-"Y1_wave5"
colnames(wide_data)[colnames(wide_data)=="deviation_1"]<-"Y2_wave1"
colnames(wide_data)[colnames(wide_data)=="deviation_2"]<-"Y2_wave2"
colnames(wide_data)[colnames(wide_data)=="deviation_3"]<-"Y2_wave3"
colnames(wide_data)[colnames(wide_data)=="deviation_4"]<-"Y2_wave4"
colnames(wide_data)[colnames(wide_data)=="deviation_5"]<-"Y2_wave5"
print(wide_data)
## # A tibble: 50 × 12
## participant_id Y1_wave1 Y1_wave2 Y1_wave3 Y1_wave4 Y1_wave5 within_mean
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 44.4 47.7 65.6 50.7 51.3 51.9
## 2 2 67.2 54.6 37.3 43.1 45.5 49.6
## 3 3 62.2 53.6 54.0 51.1 44.4 53.1
## 4 4 67.9 55.0 30.3 57.0 45.3 51.1
## 5 5 39.3 47.8 39.7 42.7 43.7 42.7
## 6 6 33.1 58.4 51.5 38.6 62.5 48.8
## 7 7 54.3 47.0 59.0 58.8 58.2 55.5
## 8 8 56.9 55.5 49.4 46.9 46.2 51.0
## 9 9 43.1 47.9 37.3 71.7 62.1 52.4
## 10 10 38.8 46.0 45.3 57.8 49.2 47.4
## # ℹ 40 more rows
## # ℹ 5 more variables: Y2_wave1 <dbl>, Y2_wave2 <dbl>, Y2_wave3 <dbl>,
## # Y2_wave4 <dbl>, Y2_wave5 <dbl>
# Reshape data from wide to long format
long_data <- wide_data %>%
pivot_longer(
cols = starts_with("Y"),
names_to = c(".value", "wave"),
names_sep = "_wave"
)
colnames(long_data)[colnames(long_data)=="Y1"]<-"Outcome_Score"
colnames(long_data)[colnames(long_data)=="Y2"]<-"Deviation_Score"
print("Converted Long Format Data:")
## [1] "Converted Long Format Data:"
print(long_data)
## # A tibble: 250 × 5
## participant_id within_mean wave Outcome_Score Deviation_Score
## <int> <dbl> <chr> <dbl> <dbl>
## 1 1 51.9 1 44.4 -7.54
## 2 1 51.9 2 47.7 -4.24
## 3 1 51.9 3 65.6 13.7
## 4 1 51.9 4 50.7 -1.23
## 5 1 51.9 5 51.3 -0.643
## 6 2 49.6 1 67.2 17.6
## 7 2 49.6 2 54.6 5.05
## 8 2 49.6 3 37.3 -12.2
## 9 2 49.6 4 43.1 -6.43
## 10 2 49.6 5 45.5 -4.01
## # ℹ 240 more rows
names(long_data)
## [1] "participant_id" "within_mean" "wave" "Outcome_Score"
## [5] "Deviation_Score"
# Create the line graph
ggplot(long_data, aes(x = wave, y = Outcome_Score, group = participant_id, color = as.factor(participant_id))) +
geom_line() +
geom_point() +
labs(
title = "Outcome Over Time",
x = "Wave",
y = "Outcome",
color = "ID"
) +
theme_minimal()
ggplot(long_data, aes(x = wave, y = Deviation_Score, group = participant_id, color = as.factor(participant_id))) +
geom_line() +
geom_point() +
labs(
title = "Deviation Over Time",
x = "Wave",
y = "Deviation",
color = "ID"
) +
theme_minimal()