students <- data.frame(
Student = c("Alice", "Bob", "Carol", "David"),
Math_2019 = c(85, 75, 92, 68),
Science_2019 = c(90, 80, 88, 70),
English_2019 = c(78, 70, 95, 65),
Math_2020 = c(88, 78, 94, 70),
Science_2020 = c(92, 83, 90, 72),
English_2020 = c(81, 72, 97, 68)
)
write.csv(students, "students_scores.csv", row.names = FALSE)
students_grades <- read.csv("students_scores.csv",header=TRUE)
students_grades
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
students_grades_tidy<- students_grades %>%
pivot_longer(
cols = -Student, # all columns except Student
names_to = c("Subject", "Year"), # split names into two new columns
names_sep = "_", # separate by underscore
values_to = "Score" # name for the value column
)
# View the tidy data
print(students_grades_tidy)
## # A tibble: 24 × 4
## Student Subject Year Score
## <chr> <chr> <chr> <int>
## 1 Alice Math 2019 85
## 2 Alice Science 2019 90
## 3 Alice English 2019 78
## 4 Alice Math 2020 88
## 5 Alice Science 2020 92
## 6 Alice English 2020 81
## 7 Bob Math 2019 75
## 8 Bob Science 2019 80
## 9 Bob English 2019 70
## 10 Bob Math 2020 78
## # ℹ 14 more rows
library(ggplot2)
# Make sure Year is treated as a factor (for discrete x-axis)
students_grades_tidy$Year <- as.factor(students_grades_tidy$Year)
# Faceted line plot
ggplot(students_grades_tidy, aes(x = Year, y = Score, group = Subject, color = Subject)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
facet_wrap(~ Student) +
labs(
title = "Student Performance Trend by Subject (2019–2020)",
x = "Year",
y = "Score"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "bottom",
strip.text = element_text(face = "bold")
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Based on these 4 graphs, We can confidently conclude that all four students scores have increased from 2019 to 2022.
*Step: For this analysis, we will have to reshape the dataset once again from long to wide where will have two columns that show students scores by year.**
# Pivot wider to have one column per year
students_grades_wide <- students_grades_tidy %>%
pivot_wider(names_from = Year, values_from = Score, names_prefix = "Year_")
print(students_grades_wide)
## # A tibble: 12 × 4
## Student Subject Year_2019 Year_2020
## <chr> <chr> <int> <int>
## 1 Alice Math 85 88
## 2 Alice Science 90 92
## 3 Alice English 78 81
## 4 Bob Math 75 78
## 5 Bob Science 80 83
## 6 Bob English 70 72
## 7 Carol Math 92 94
## 8 Carol Science 88 90
## 9 Carol English 95 97
## 10 David Math 68 70
## 11 David Science 70 72
## 12 David English 65 68
improvement <- students_grades_wide %>%
mutate(Improvement = Year_2020 - Year_2019)
print(improvement)
## # A tibble: 12 × 5
## Student Subject Year_2019 Year_2020 Improvement
## <chr> <chr> <int> <int> <int>
## 1 Alice Math 85 88 3
## 2 Alice Science 90 92 2
## 3 Alice English 78 81 3
## 4 Bob Math 75 78 3
## 5 Bob Science 80 83 3
## 6 Bob English 70 72 2
## 7 Carol Math 92 94 2
## 8 Carol Science 88 90 2
## 9 Carol English 95 97 2
## 10 David Math 68 70 2
## 11 David Science 70 72 2
## 12 David English 65 68 3
most_improve_student <- improvement %>%
group_by(Subject) %>%
filter(Improvement == max(Improvement)) %>%
select(Subject, Student, Improvement)
print(most_improve_student)
## # A tibble: 5 × 3
## # Groups: Subject [3]
## Subject Student Improvement
## <chr> <chr> <int>
## 1 Math Alice 3
## 2 English Alice 3
## 3 Math Bob 3
## 4 Science Bob 3
## 5 English David 3
We can conclude that Alice and Bob are the most improved students in Math. Alice and David are the most improved students in English and Bob is the only student with the most improvement in science.