Data wrangling
2024-03-19
set.seed (9)
library(tidyr)
# Data
student_data <- data.frame(
student_id = 1:50,
name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
#Transform data to wide format
student_data <- separate(student_data, name_age, into = c("name", "age"), sep = "_")
student_data <- separate_rows(student_data, exam_scores, sep = ",")
student_data <- separate(student_data, exam_scores, into = c("exam", "score"), sep = "_")
wide_student_data <- spread(student_data, key = exam, value = score)
print(wide_student_data)
## # A tibble: 50 × 6
## student_id name age final midterm1 midterm2
## <int> <chr> <chr> <chr> <chr> <chr>
## 1 1 John 21 75 80 85
## 2 2 Alice 20 80 75 78
## 3 3 Bob 22 85 82 80
## 4 4 Emily 23 92 88 90
## 5 5 Michael 22 88 85 86
## 6 6 John 21 75 80 85
## 7 7 Alice 20 80 75 78
## 8 8 Bob 22 85 82 80
## 9 9 Emily 23 92 88 90
## 10 10 Michael 22 88 85 86
## # ℹ 40 more rows