# Data
student_data <- data.frame(
student_id = 1:50,
name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
# Split variables name_age to 2 separate columns: name and age
student_data_separated <- separate(
student_data,
col = "name_age",
into = c ("name" , "age"),
sep = "_",
remove = TRUE,
convert = FALSE,
extra = "warn",
fill = "warn",
)
# Split variables exam_scores to separate columns for each exam type
student_data_separated <- separate(
student_data_separated,
col = "exam1",
into = c ("exam1", "score1"),
sep = "_",
remove = TRUE,
convert = FALSE,
extra = "warn",
fill = "warn",
)
student_data_separated <- separate(
student_data_separated,
col = "exam2",
into = c ("exam2", "score2"),
sep = "_",
remove = TRUE,
convert = FALSE,
extra = "warn",
fill = "warn",
)
student_data_separated <- separate(
student_data_separated,
col = "exam3",
into = c ("exam3", "score3"),
sep = "_",
remove = TRUE,
convert = FALSE,
extra = "warn",
fill = "warn",
)
# Complete missing combinations for students
student_data_separated <- complete (
student_data_separated,
name,
age,
fill = list(),
explicit = TRUE,
)
# Transform data to wide format
#student_data_separated_spread <- spread(
#student_data_separated,
#key,
##value,
#fill = NA,
#convert = FALSE,
#drop = TRUE,
#sep = NULL,
#)
# Merge variables name and age to one column called name_age
student_data_separated <- unite(
student_data_separated,
col = "name_age",
name,
age,
sep = "_",
remove = TRUE,
na.rm = FALSE,
)
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.