TASKS
FIRST TASK
set.seed(1)
student_data <- data.frame(
student_id = 1:50,
name = paste0("Student", 1:50),
midterm1 = sample(60:100, 50, replace = TRUE),
midterm2 = sample(60:100, 50, replace = TRUE),
final = sample(60:100, 50, replace = TRUE))
# Transform data to long format using gather:
student_data_long <- student_data %>% pivot_longer(cols=c('midterm1', 'midterm2', 'final'),
names_to='exams', values_to='points')
# Print first few rows:
head(student_data_long)## # A tibble: 6 × 4
## student_id name exams points
## <int> <chr> <chr> <int>
## 1 1 Student1 midterm1 63
## 2 1 Student1 midterm2 82
## 3 1 Student1 final 60
## 4 2 Student2 midterm1 98
## 5 2 Student2 midterm2 65
## 6 2 Student2 final 88
SECOND TASK
set.seed(2)
# Data
student_data2 <- data.frame(
student_id = 1:50,
name_age = c("John_21", "Alice_20", "Bob_22", "Emily_23", "Michael_22"),
exam_scores = c("midterm1_80,midterm2_85,final_75", "midterm1_75,midterm2_78,final_80", "midterm1_82,midterm2_80,final_85", "midterm1_88,midterm2_90,final_92", "midterm1_85,midterm2_86,final_88")
)
# Split variables name_age to 2 separate columns: name and age
student_data2 <- separate(student_data2, col='name_age', into=c( 'name' , 'age' ), sep="_" )
# Split variables exam_scores to separate columns for each exam type
student_data2 <- separate(student_data2, col='exam_scores', into=c('midterm1','midterm2','final'), sep=',')
student_data2$midterm1 <- gsub("midterm1_", "", student_data2$midterm1)
student_data2$midterm2 <- gsub("midterm2_", "", student_data2$midterm2)
student_data2$final <- gsub("final_", "", student_data2$final)
# Transform student_data2 to long format
student_data2_long <- student_data2 %>%
pivot_longer(cols=c('midterm1', 'midterm2', 'final'), names_to='exams', values_to='points')
head(student_data2_long)## # A tibble: 6 × 5
## student_id name age exams points
## <int> <chr> <chr> <chr> <chr>
## 1 1 John 21 midterm1 80
## 2 1 John 21 midterm2 85
## 3 1 John 21 final 75
## 4 2 Alice 20 midterm1 75
## 5 2 Alice 20 midterm2 78
## 6 2 Alice 20 final 80
FOURTH TASK
# Transform data to wide format
student_data2_wide <- student_data2_long %>%
pivot_wider(names_from = "exams", values_from = "points")
head(student_data2_wide)## # A tibble: 6 × 6
## student_id name age midterm1 midterm2 final
## <int> <chr> <chr> <chr> <chr> <chr>
## 1 1 John 21 80 85 75
## 2 2 Alice 20 75 78 80
## 3 3 Bob 22 82 80 85
## 4 4 Emily 23 88 90 92
## 5 5 Michael 22 85 86 88
## 6 6 John 21 80 85 75
FIFTH TASK
# Merge variables name and age to one column called name_age
student_data2_merge <- student_data2 %>%
unite("name-age", name:age, sep = "-")
head(student_data2_merge)## student_id name-age midterm1 midterm2 final
## 1 1 John-21 80 85 75
## 2 2 Alice-20 75 78 80
## 3 3 Bob-22 82 80 85
## 4 4 Emily-23 88 90 92
## 5 5 Michael-22 85 86 88
## 6 6 John-21 80 85 75