Sample Code for Dylan

setwd(“C:/Work Files/Collaboration/Pontus/Israel Hamas Conflict”)

Steps to create a longitudinal variable which reflects the time-point specific deviation from each individual participants own longitudinal average.

Creating an example dataset:

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# Set the seed for reproducibility
set.seed(123)

# Number of participants
num_participants <- 50

# Number of waves
num_waves <- 5

# Generate participant IDs
participant_id <- rep(1:num_participants, each = num_waves)

# Generate wave variable
wave <- rep(1:num_waves, times = num_participants)

# Generate outcome variable data
# Assuming a normally distributed outcome variable
outcome <- rnorm(num_participants * num_waves, mean = 50, sd = 10)

# Create the data frame
longitudinal_data <- data.frame(
  participant_id = participant_id,
  wave = wave,
  outcome = outcome
)

# View the first few rows of the data frame
print("Longitudinal Data Frame:")

## [1] "Longitudinal Data Frame:"

head(longitudinal_data, 10)

##    participant_id wave  outcome
## 1               1    1 44.39524
## 2               1    2 47.69823
## 3               1    3 65.58708
## 4               1    4 50.70508
## 5               1    5 51.29288
## 6               2    1 67.15065
## 7               2    2 54.60916
## 8               2    3 37.34939
## 9               2    4 43.13147
## 10              2    5 45.54338

# Reshape data from long to wide format
wide_data <- longitudinal_data %>%
  pivot_wider(
    names_from = wave, 
    values_from = outcome,
    names_prefix = "wave_"
  )

print("Converted Wide Format Data:")

## [1] "Converted Wide Format Data:"

print(wide_data)

## # A tibble: 50 × 6
##    participant_id wave_1 wave_2 wave_3 wave_4 wave_5
##             <int>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1              1   44.4   47.7   65.6   50.7   51.3
##  2              2   67.2   54.6   37.3   43.1   45.5
##  3              3   62.2   53.6   54.0   51.1   44.4
##  4              4   67.9   55.0   30.3   57.0   45.3
##  5              5   39.3   47.8   39.7   42.7   43.7
##  6              6   33.1   58.4   51.5   38.6   62.5
##  7              7   54.3   47.0   59.0   58.8   58.2
##  8              8   56.9   55.5   49.4   46.9   46.2
##  9              9   43.1   47.9   37.3   71.7   62.1
## 10             10   38.8   46.0   45.3   57.8   49.2
## # ℹ 40 more rows

Create a within person mean variable

wide_data <- wide_data %>%
  mutate(within_mean = rowMeans(select(., wave_1:wave_5), na.rm = TRUE))
head(wide_data)

## # A tibble: 6 × 7
##   participant_id wave_1 wave_2 wave_3 wave_4 wave_5 within_mean
##            <int>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>       <dbl>
## 1              1   44.4   47.7   65.6   50.7   51.3        51.9
## 2              2   67.2   54.6   37.3   43.1   45.5        49.6
## 3              3   62.2   53.6   54.0   51.1   44.4        53.1
## 4              4   67.9   55.0   30.3   57.0   45.3        51.1
## 5              5   39.3   47.8   39.7   42.7   43.7        42.7
## 6              6   33.1   58.4   51.5   38.6   62.5        48.8

Create five new variables representing time specific deviation from individual averages

wide_data$deviation_1 <-(wide_data$wave_1 - wide_data$within_mean)
wide_data$deviation_2 <-(wide_data$wave_2 - wide_data$within_mean)
wide_data$deviation_3 <-(wide_data$wave_3 - wide_data$within_mean)
wide_data$deviation_4 <-(wide_data$wave_4 - wide_data$within_mean)
wide_data$deviation_5 <-(wide_data$wave_5 - wide_data$within_mean)

head(wide_data)

## # A tibble: 6 × 12
##   participant_id wave_1 wave_2 wave_3 wave_4 wave_5 within_mean deviation_1
##            <int>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>       <dbl>       <dbl>
## 1              1   44.4   47.7   65.6   50.7   51.3        51.9       -7.54
## 2              2   67.2   54.6   37.3   43.1   45.5        49.6       17.6 
## 3              3   62.2   53.6   54.0   51.1   44.4        53.1        9.16
## 4              4   67.9   55.0   30.3   57.0   45.3        51.1       16.8 
## 5              5   39.3   47.8   39.7   42.7   43.7        42.7       -3.35
## 6              6   33.1   58.4   51.5   38.6   62.5        48.8      -15.7 
## # ℹ 4 more variables: deviation_2 <dbl>, deviation_3 <dbl>, deviation_4 <dbl>,
## #   deviation_5 <dbl>

Convert the data from wide back to long for ggplots first rename then restructure

colnames(wide_data)[colnames(wide_data)=="wave_1"]<-"Y1_wave1"
colnames(wide_data)[colnames(wide_data)=="wave_2"]<-"Y1_wave2"
colnames(wide_data)[colnames(wide_data)=="wave_3"]<-"Y1_wave3"
colnames(wide_data)[colnames(wide_data)=="wave_4"]<-"Y1_wave4"
colnames(wide_data)[colnames(wide_data)=="wave_5"]<-"Y1_wave5"

colnames(wide_data)[colnames(wide_data)=="deviation_1"]<-"Y2_wave1"
colnames(wide_data)[colnames(wide_data)=="deviation_2"]<-"Y2_wave2"
colnames(wide_data)[colnames(wide_data)=="deviation_3"]<-"Y2_wave3"
colnames(wide_data)[colnames(wide_data)=="deviation_4"]<-"Y2_wave4"
colnames(wide_data)[colnames(wide_data)=="deviation_5"]<-"Y2_wave5"

print(wide_data)

## # A tibble: 50 × 12
##    participant_id Y1_wave1 Y1_wave2 Y1_wave3 Y1_wave4 Y1_wave5 within_mean
##             <int>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>       <dbl>
##  1              1     44.4     47.7     65.6     50.7     51.3        51.9
##  2              2     67.2     54.6     37.3     43.1     45.5        49.6
##  3              3     62.2     53.6     54.0     51.1     44.4        53.1
##  4              4     67.9     55.0     30.3     57.0     45.3        51.1
##  5              5     39.3     47.8     39.7     42.7     43.7        42.7
##  6              6     33.1     58.4     51.5     38.6     62.5        48.8
##  7              7     54.3     47.0     59.0     58.8     58.2        55.5
##  8              8     56.9     55.5     49.4     46.9     46.2        51.0
##  9              9     43.1     47.9     37.3     71.7     62.1        52.4
## 10             10     38.8     46.0     45.3     57.8     49.2        47.4
## # ℹ 40 more rows
## # ℹ 5 more variables: Y2_wave1 <dbl>, Y2_wave2 <dbl>, Y2_wave3 <dbl>,
## #   Y2_wave4 <dbl>, Y2_wave5 <dbl>

# Reshape data from wide to long format
long_data <- wide_data %>%
  pivot_longer(
    cols = starts_with("Y"), 
    names_to = c(".value", "wave"),
    names_sep = "_wave"
  )

colnames(long_data)[colnames(long_data)=="Y1"]<-"Outcome_Score"
colnames(long_data)[colnames(long_data)=="Y2"]<-"Deviation_Score"


print("Converted Long Format Data:")

## [1] "Converted Long Format Data:"

print(long_data)

## # A tibble: 250 × 5
##    participant_id within_mean wave  Outcome_Score Deviation_Score
##             <int>       <dbl> <chr>         <dbl>           <dbl>
##  1              1        51.9 1              44.4          -7.54 
##  2              1        51.9 2              47.7          -4.24 
##  3              1        51.9 3              65.6          13.7  
##  4              1        51.9 4              50.7          -1.23 
##  5              1        51.9 5              51.3          -0.643
##  6              2        49.6 1              67.2          17.6  
##  7              2        49.6 2              54.6           5.05 
##  8              2        49.6 3              37.3         -12.2  
##  9              2        49.6 4              43.1          -6.43 
## 10              2        49.6 5              45.5          -4.01 
## # ℹ 240 more rows

names(long_data)

## [1] "participant_id"  "within_mean"     "wave"            "Outcome_Score"  
## [5] "Deviation_Score"

# Create the line graph
ggplot(long_data, aes(x = wave, y = Outcome_Score, group = participant_id, color = as.factor(participant_id))) +
  geom_line() +
  geom_point() +
  labs(
    title = "Outcome Over Time",
    x = "Wave",
    y = "Outcome",
    color = "ID"
  ) +
  theme_minimal()

ggplot(long_data, aes(x = wave, y = Deviation_Score, group = participant_id, color = as.factor(participant_id))) +
  geom_line() +
  geom_point() +
  labs(
    title = "Deviation Over Time",
    x = "Wave",
    y = "Deviation",
    color = "ID"
  ) +
  theme_minimal()

Sample Code for Dylan

Ty

2024-05-22