set.seed(31)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.3.3
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.4.4     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Data
student_data <- data.frame(
  student_id = 1:50,
  name = paste0("Student", 1:50),
  midterm1 = sample(60:100, 50, replace = TRUE),
  midterm2 = sample(60:100, 50, replace = TRUE),
  final = sample(60:100, 50, replace = TRUE)
)

# Transform data to long format using gather:
student_data_long <- gather(data = student_data, key = "exam", value = "score", -name, -student_id)


# Print first few rows:
head(student_data_long)
##   student_id     name     exam score
## 1          1 Student1 midterm1    70
## 2          2 Student2 midterm1    99
## 3          3 Student3 midterm1    85
## 4          4 Student4 midterm1    86
## 5          5 Student5 midterm1    68
## 6          6 Student6 midterm1    70
# Data(SEPARATE)
student_data2 <- data.frame(
  student_id = 1:50,
  name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
  exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)

# Split variables name_age to 2 separate columns: name and age
student_data2 <- separate(student_data2, name_age, into = c("name", "age"), sep = "_") 

# Split variables exam_scores to separate columns for each exam type
student_data2 <- separate(student_data2, exam_scores, into = c("midterm1", "midterm2", "final"), sep = ",")

student_data2 <- student_data2 %>%
  mutate(midterm1 = str_remove(midterm1, "midterm1_"),
         midterm2 = str_remove(midterm2, "midterm2_"),
         final = str_remove(final, "final_"))

# Transform student_data2 to long format
student_data2_long <- gather(student_data2, exam, score, -student_id, -name, -age)
# Data(COMPLETE)
student_data <- data.frame(
  student_id = 1:50,
  name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
  exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)

# Complete missing combinations for students
student_data_complete <- student_data %>%
  separate_rows(exam_scores, sep = ",") %>%
  separate(exam_scores, into = c("exam", "score"), sep = "_") %>%
  complete(student_id, exam = c("midterm1", "midterm2", "final"), fill = list(score = 0), explicit = TRUE)
# Data(SPREAD)
student_data <- data.frame(
  student_id = 1:50,
  name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
  exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)

# Transform data to wide format
student_data_wide <- separate(student_data, name_age, into = c("name", "age"), sep = "_") 

student_data_wide <- separate(student_data_wide, exam_scores, into = c("exam_type1", "score1", "exam_type2", "score2", "exam_type3", "score3"), sep = "[_,]")

student_data_wide <- spread(student_data_wide, key = exam_type1, value = score1, fill = NA, convert = FALSE, drop = TRUE, sep = NULL)
student_data_wide <- spread(student_data_wide, key = exam_type2, value = score2, fill = NA, convert = FALSE, drop = TRUE, sep = NULL)
student_data_wide <- spread(student_data_wide, key = exam_type3, value = score3, fill = NA, convert = FALSE, drop = TRUE, sep = NULL)
# Data(UNITE)
student_data <- data.frame(
  student_id = 1:50,
  name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
  exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)

# Merge variables name and age to one column called name_age
student_data_unite <- separate(student_data, name_age, into = c("name", "age"), sep = "_") #data was not separated so first I'm separate the variables.
student_data_unite <- unite(student_data_unite, col = "name_age", name, age, sep = "_", remove = TRUE, na.rm = FALSE)#then I merged the variables here.