set.seed(31)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.3.3
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.4.4 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Data
student_data <- data.frame(
student_id = 1:50,
name = paste0("Student", 1:50),
midterm1 = sample(60:100, 50, replace = TRUE),
midterm2 = sample(60:100, 50, replace = TRUE),
final = sample(60:100, 50, replace = TRUE)
)
# Transform data to long format using gather:
student_data_long <- gather(data = student_data, key = "exam", value = "score", -name, -student_id)
# Print first few rows:
head(student_data_long)
## student_id name exam score
## 1 1 Student1 midterm1 70
## 2 2 Student2 midterm1 99
## 3 3 Student3 midterm1 85
## 4 4 Student4 midterm1 86
## 5 5 Student5 midterm1 68
## 6 6 Student6 midterm1 70
# Data(SEPARATE)
student_data2 <- data.frame(
student_id = 1:50,
name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
# Split variables name_age to 2 separate columns: name and age
student_data2 <- separate(student_data2, name_age, into = c("name", "age"), sep = "_")
# Split variables exam_scores to separate columns for each exam type
student_data2 <- separate(student_data2, exam_scores, into = c("midterm1", "midterm2", "final"), sep = ",")
student_data2 <- student_data2 %>%
mutate(midterm1 = str_remove(midterm1, "midterm1_"),
midterm2 = str_remove(midterm2, "midterm2_"),
final = str_remove(final, "final_"))
# Transform student_data2 to long format
student_data2_long <- gather(student_data2, exam, score, -student_id, -name, -age)
# Data(COMPLETE)
student_data <- data.frame(
student_id = 1:50,
name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
# Complete missing combinations for students
student_data_complete <- student_data %>%
separate_rows(exam_scores, sep = ",") %>%
separate(exam_scores, into = c("exam", "score"), sep = "_") %>%
complete(student_id, exam = c("midterm1", "midterm2", "final"), fill = list(score = 0), explicit = TRUE)
# Data(SPREAD)
student_data <- data.frame(
student_id = 1:50,
name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
# Transform data to wide format
student_data_wide <- separate(student_data, name_age, into = c("name", "age"), sep = "_")
student_data_wide <- separate(student_data_wide, exam_scores, into = c("exam_type1", "score1", "exam_type2", "score2", "exam_type3", "score3"), sep = "[_,]")
student_data_wide <- spread(student_data_wide, key = exam_type1, value = score1, fill = NA, convert = FALSE, drop = TRUE, sep = NULL)
student_data_wide <- spread(student_data_wide, key = exam_type2, value = score2, fill = NA, convert = FALSE, drop = TRUE, sep = NULL)
student_data_wide <- spread(student_data_wide, key = exam_type3, value = score3, fill = NA, convert = FALSE, drop = TRUE, sep = NULL)
# Data(UNITE)
student_data <- data.frame(
student_id = 1:50,
name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
# Merge variables name and age to one column called name_age
student_data_unite <- separate(student_data, name_age, into = c("name", "age"), sep = "_") #data was not separated so first I'm separate the variables.
student_data_unite <- unite(student_data_unite, col = "name_age", name, age, sep = "_", remove = TRUE, na.rm = FALSE)#then I merged the variables here.