Data wrangling

2024-03-19

set.seed (9)
library(tidyr)
# Data
student_data <- data.frame(
  student_id = 1:50,
  name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
  exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
#Transform data to wide format

student_data <- separate(student_data, name_age, into = c("name", "age"), sep = "_")
student_data <- separate_rows(student_data, exam_scores, sep = ",")
student_data <- separate(student_data, exam_scores, into = c("exam", "score"), sep = "_")

wide_student_data <- spread(student_data, key = exam, value = score)


print(wide_student_data)
## # A tibble: 50 × 6
##    student_id name    age   final midterm1 midterm2
##         <int> <chr>   <chr> <chr> <chr>    <chr>   
##  1          1 John    21    75    80       85      
##  2          2 Alice   20    80    75       78      
##  3          3 Bob     22    85    82       80      
##  4          4 Emily   23    92    88       90      
##  5          5 Michael 22    88    85       86      
##  6          6 John    21    75    80       85      
##  7          7 Alice   20    80    75       78      
##  8          8 Bob     22    85    82       80      
##  9          9 Emily   23    92    88       90      
## 10         10 Michael 22    88    85       86      
## # ℹ 40 more rows