Exercise 1 (Gather)
library(tidyr)
set.seed(7)
# Data
student_data <- data.frame(
student_id = 1:50,
name = paste0("Student", 1:50),
midterm1 = sample(60:100, 50, replace = TRUE),
midterm2 = sample(60:100, 50, replace = TRUE),
final = sample(60:100, 50, replace = TRUE)
)
# Transform data to long format using gather:
student_data_long <- gather(student_data, key = "exam", value = "score", -student_id, -name)
# Print first few rows:
head(student_data_long)
## student_id name exam score
## 1 1 Student1 midterm1 78
## 2 2 Student2 midterm1 90
## 3 3 Student3 midterm1 87
## 4 4 Student4 midterm1 98
## 5 5 Student5 midterm1 61
## 6 6 Student6 midterm1 74
Exercise 2 (Separate) Part 1
library(tidyr)
# Data
student_data2 <- data.frame(
student_id = 1:50,
name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
# Split variables name_age to 2 separate columns: name and age
seperated<-separate(student_data2, name_age, c('name', 'age'), sep = '_')
head(seperated)
## student_id name age exam_scores
## 1 1 John 21 midterm1_80,midterm2_85,final_75
## 2 2 Alice 20 midterm1_75,midterm2_78,final_80
## 3 3 Bob 22 midterm1_82,midterm2_80,final_85
## 4 4 Emily 23 midterm1_88,midterm2_90,final_92
## 5 5 Michael 22 midterm1_85,midterm2_86,final_88
## 6 6 John 21 midterm1_80,midterm2_85,final_75
Exercise 2 (Separate) Part 2
# Split variables exam_scores to separate columns for each exam type
seperated_exams<-separate(student_data2, exam_scores, c("midterm1", "midterm2", "final"), sep = ",")
head(seperated_exams)
## student_id name_age midterm1 midterm2 final
## 1 1 John_21 midterm1_80 midterm2_85 final_75
## 2 2 Alice_20 midterm1_75 midterm2_78 final_80
## 3 3 Bob_22 midterm1_82 midterm2_80 final_85
## 4 4 Emily_23 midterm1_88 midterm2_90 final_92
## 5 5 Michael_22 midterm1_85 midterm2_86 final_88
## 6 6 John_21 midterm1_80 midterm2_85 final_75
# Transform student_data2 to long format: data is already in long format
Exercise 3 (Complete)
library(tidyr)
# Data
student_data <- data.frame(
student_id = 1:50,
name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80",
"midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92",
"midterm1_85,midterm2_86,final_88")
)
# Complete missing combinations for students
completed_student_data <- complete(student_data, name_age, exam_scores, fill = list(), explicit = TRUE)
head(completed_student_data)
## # A tibble: 6 × 3
## name_age exam_scores student_id
## <chr> <chr> <int>
## 1 Alice_20 midterm1_75,midterm2_78,final_80 2
## 2 Alice_20 midterm1_75,midterm2_78,final_80 7
## 3 Alice_20 midterm1_75,midterm2_78,final_80 12
## 4 Alice_20 midterm1_75,midterm2_78,final_80 17
## 5 Alice_20 midterm1_75,midterm2_78,final_80 22
## 6 Alice_20 midterm1_75,midterm2_78,final_80 27
Exercise 4 (Spread)
library(tidyr)
# Data
student_data <- data.frame(
student_id = 1:50,
name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
# Transform data to wide format
student_data_wide<-separate(student_data, exam_scores, into = c("midterm1", "midterm2", "final"), sep = ",")
student_data_wide<-separate(student_data_wide, name_age, into = c('name', 'age'), sep = '_')
head(student_data_wide)
## student_id name age midterm1 midterm2 final
## 1 1 John 21 midterm1_80 midterm2_85 final_75
## 2 2 Alice 20 midterm1_75 midterm2_78 final_80
## 3 3 Bob 22 midterm1_82 midterm2_80 final_85
## 4 4 Emily 23 midterm1_88 midterm2_90 final_92
## 5 5 Michael 22 midterm1_85 midterm2_86 final_88
## 6 6 John 21 midterm1_80 midterm2_85 final_75
Exercise 5 (Unite)
library(tidyr)
# Student and age were already merged, so I changed source code so we can merge them ourselves
student_data <- data.frame(
student_id = 1:50,
name = c("John", "Alice", "Bob", "Emily", "Michael"),
age = c("21", "20", "22", "23", "22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
# Merge variables name and age to one column called name_age
student_data_united <- unite(student_data, name, age, col="name_age", sep="_")
head(student_data_united)
## student_id name_age exam_scores
## 1 1 John_21 midterm1_80,midterm2_85,final_75
## 2 2 Alice_20 midterm1_75,midterm2_78,final_80
## 3 3 Bob_22 midterm1_82,midterm2_80,final_85
## 4 4 Emily_23 midterm1_88,midterm2_90,final_92
## 5 5 Michael_22 midterm1_85,midterm2_86,final_88
## 6 6 John_21 midterm1_80,midterm2_85,final_75