library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Create a list of student names
student_names <- c("Alice", "Bob", "Charlie", "David", "Eva", "Frank", "Grace", "Hank", "Ivy", "Jack")
# Generate random ages for the students
student_ages <- sample(18:25, length(student_names), replace = TRUE)
# Generate random test scores for each student
test_scores <- sample(60:100, length(student_names), replace = TRUE)
# Define the genders
genders <- c("Male", "Female")
# Create a data frame and convert it into tibble to store the data
data <- as.tibble(data.frame(
Name = student_names,
Gender = as.factor(sample(genders, length(student_names), replace = TRUE)),
Age = student_ages,
Test_Score = test_scores
))
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Display the dataset
data
## # A tibble: 10 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Alice Female 18 65
## 2 Bob Male 22 78
## 3 Charlie Female 24 71
## 4 David Female 18 86
## 5 Eva Female 24 91
## 6 Frank Male 22 88
## 7 Grace Female 24 77
## 8 Hank Male 23 80
## 9 Ivy Male 19 68
## 10 Jack Male 21 71
1. Display the first 5 rows of the dataset
head(data)
## # A tibble: 6 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Alice Female 18 65
## 2 Bob Male 22 78
## 3 Charlie Female 24 71
## 4 David Female 18 86
## 5 Eva Female 24 91
## 6 Frank Male 22 88
2. Display basic statistics of the numeric columns
summary(data)
## Name Gender Age Test_Score
## Length:10 Female:5 Min. :18.00 Min. :65.0
## Class :character Male :5 1st Qu.:19.50 1st Qu.:71.0
## Mode :character Median :22.00 Median :77.5
## Mean :21.50 Mean :77.5
## 3rd Qu.:23.75 3rd Qu.:84.5
## Max. :24.00 Max. :91.0
3. Count the number of rows and columns
dim(data)
## [1] 10 4
4. Display the unique ages in the dataset
unique(data$Age)
## [1] 18 22 24 23 19 21
5. Calculate the average test score
mean(data$Test_Score)
## [1] 77.5
6. Find the student with the highest test score
data %>% filter(Test_Score == max(Test_Score))
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Eva Female 24 91
7. Filter students who are older than 21
data %>% filter(Age > 21)
## # A tibble: 6 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Bob Male 22 78
## 2 Charlie Female 24 71
## 3 Eva Female 24 91
## 4 Frank Male 22 88
## 5 Grace Female 24 77
## 6 Hank Male 23 80
8. Group students by age and calculate the average test score for
each age group
data %>% group_by(Age) %>% summarize(Avg_Test_Score = mean(Test_Score))
## # A tibble: 6 × 2
## Age Avg_Test_Score
## <int> <dbl>
## 1 18 75.5
## 2 19 68
## 3 21 71
## 4 22 83
## 5 23 80
## 6 24 79.7
9. Sort the dataset by test score in descending order
data %>% arrange(desc(Test_Score))
## # A tibble: 10 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Eva Female 24 91
## 2 Frank Male 22 88
## 3 David Female 18 86
## 4 Hank Male 23 80
## 5 Bob Male 22 78
## 6 Grace Female 24 77
## 7 Charlie Female 24 71
## 8 Jack Male 21 71
## 9 Ivy Male 19 68
## 10 Alice Female 18 65
10. Count the number of students in each age group
data %>% count(Age)
## # A tibble: 6 × 2
## Age n
## <int> <int>
## 1 18 2
## 2 19 1
## 3 21 1
## 4 22 2
## 5 23 1
## 6 24 3
11. Calculate the standard deviation of test scores
sd(data$Test_Score)
## [1] 8.834906
12. Find the student(s) with the lowest test score
data %>% filter(Test_Score == min(Test_Score))
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Alice Female 18 65
13. Filter students with test scores above 90
data %>% filter(Test_Score > 90)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Eva Female 24 91
15. Calculate the correlation between age and test scores
cor(data$Age, data$Test_Score)
## [1] 0.3462732
16. Display the last 5 rows of the dataset
tail(data)
## # A tibble: 6 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Eva Female 24 91
## 2 Frank Male 22 88
## 3 Grace Female 24 77
## 4 Hank Male 23 80
## 5 Ivy Male 19 68
## 6 Jack Male 21 71
17. Filter students who scored below 70
data %>% filter(Test_Score < 70)
## # A tibble: 2 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Alice Female 18 65
## 2 Ivy Male 19 68
18. Group students by test score range and count the number of
students in each range
data %>% group_by(Test_Score_Range = cut(Test_Score, breaks = seq(0, 100, by = 10))) %>% summarize(Count = n())
## # A tibble: 4 × 2
## Test_Score_Range Count
## <fct> <int>
## 1 (60,70] 2
## 2 (70,80] 5
## 3 (80,90] 2
## 4 (90,100] 1
19. Calculate the total test score of all students
sum(data$Test_Score)
## [1] 775
20. Calculate the percentage of students who passed (scored 70 or
above)
passed_students <- data %>% filter(Test_Score >= 70)
percentage_passed <- nrow(passed_students) / nrow(data) * 100
21. Find the student with the longest name
data %>% arrange(desc(nchar(Name))) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Charlie Female 24 71
22. Calculate the range of test scores (max - min)
diff(range(data$Test_Score))
## [1] 26
23. Find the student with the highest age
data %>% filter(Age == max(Age))
## # A tibble: 3 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Charlie Female 24 71
## 2 Eva Female 24 91
## 3 Grace Female 24 77
24. Calculate the average age of male and female students (assuming
a gender column)
data %>% group_by(Gender) %>% summarize(Avg_Age = mean(Age, na.rm = TRUE))
## # A tibble: 2 × 2
## Gender Avg_Age
## <fct> <dbl>
## 1 Female 21.6
## 2 Male 21.4
25. Calculate the total number of male and female students (assuming
a gender column)
data %>% count(Gender)
## # A tibble: 2 × 2
## Gender n
## <fct> <int>
## 1 Female 5
## 2 Male 5
28. Filter students who are 20 years old and scored above 80
data %>% filter(Age == 20, Test_Score > 80)
## # A tibble: 0 × 4
## # ℹ 4 variables: Name <chr>, Gender <fct>, Age <int>, Test_Score <int>
29. Find the student with the highest test score for each age
group
data %>% group_by(Age) %>% slice(which.max(Test_Score))
## # A tibble: 6 × 4
## # Groups: Age [6]
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 David Female 18 86
## 2 Ivy Male 19 68
## 3 Jack Male 21 71
## 4 Frank Male 22 88
## 5 Hank Male 23 80
## 6 Eva Female 24 91
30. Calculate the correlation between age and test scores
cor(data$Age, data$Test_Score)
## [1] 0.3462732
31. Calculate the number of students in each age group, grouped in
5-year intervals
data %>% group_by(Age_Group = cut(Age, breaks = seq(18, 30, by = 5))) %>% summarize(Count = n())
## # A tibble: 3 × 2
## Age_Group Count
## <fct> <int>
## 1 (18,23] 5
## 2 (23,28] 3
## 3 <NA> 2
32. Calculate the average age of students with test scores above
90
data %>% filter(Test_Score > 90) %>% summarize(Avg_Age = mean(Age))
## # A tibble: 1 × 1
## Avg_Age
## <dbl>
## 1 24
33. Find the student with the lowest test score in the age group of
22
data %>% filter(Age == 22) %>% arrange(Test_Score) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Bob Male 22 78
34. Calculate the percentage of students who are 25 years old
data %>% filter(Age == 25) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 0
35. Group students by age and gender and calculate the average test
score for each group
data %>% group_by(Age, Gender) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## `summarise()` has grouped output by 'Age'. You can override using the `.groups`
## argument.
## # A tibble: 6 × 3
## # Groups: Age [6]
## Age Gender Avg_Test_Score
## <int> <fct> <dbl>
## 1 18 Female 75.5
## 2 19 Male 68
## 3 21 Male 71
## 4 22 Male 83
## 5 23 Male 80
## 6 24 Female 79.7
38. Calculate the average age of students who scored below 70
data %>% filter(Test_Score < 70) %>% summarize(Avg_Age = mean(Age))
## # A tibble: 1 × 1
## Avg_Age
## <dbl>
## 1 18.5
39. Calculate the total number of students for each unique age
data %>% group_by(Age) %>% tally()
## # A tibble: 6 × 2
## Age n
## <int> <int>
## 1 18 2
## 2 19 1
## 3 21 1
## 4 22 2
## 5 23 1
## 6 24 3
40. Calculate the variance of test scores
var(data$Test_Score)
## [1] 78.05556
41. Calculate the percentage of students who are 24 years old and
scored between 70 and 80
data %>% filter(Age == 24, Test_Score >= 70, Test_Score <= 80) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 20
42. Find the student with the highest test score among students aged
23
data %>% filter(Age == 23) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Hank Male 23 80
43. Calculate the percentage of male and female students (assuming a
gender column)
data %>% group_by(Gender) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 2 × 2
## Gender Percentage
## <fct> <dbl>
## 1 Female 50
## 2 Male 50
44. Calculate the interquartile range (IQR) of test scores
IQR(data$Test_Score)
## [1] 13.5
45. Find the student with the lowest test score for each age
group
data %>% group_by(Age) %>% filter(Test_Score == min(Test_Score))
## # A tibble: 6 × 4
## # Groups: Age [6]
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Alice Female 18 65
## 2 Bob Male 22 78
## 3 Charlie Female 24 71
## 4 Hank Male 23 80
## 5 Ivy Male 19 68
## 6 Jack Male 21 71
46. Calculate the coefficient of variation (CV) for test scores
cv <- sd(data$Test_Score) / mean(data$Test_Score) * 100
cv
## [1] 11.39988
47. Find the student with the highest age and test score
data %>% arrange(desc(Age), desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Eva Female 24 91
48. Calculate the average test score for male and female students
(assuming a gender column)
data %>% group_by(Gender) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## # A tibble: 2 × 2
## Gender Avg_Test_Score
## <fct> <dbl>
## 1 Female 78
## 2 Male 77
49. Calculate the number of students who are 22 years old and scored
above 75
data %>% filter(Age == 22, Test_Score > 75) %>% summarize(Count = n())
## # A tibble: 1 × 1
## Count
## <int>
## 1 2
50. Find the student with the highest test score among students aged
21 or younger
data %>% filter(Age <= 21) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 David Female 18 86
51. Calculate the percentage of students who are 23 years old and
scored 60 or below
data %>% filter(Age == 23, Test_Score <= 60) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 0
52. Find the student with the highest age and the lowest test
score
data %>% arrange(desc(Age), Test_Score) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Charlie Female 24 71
54. Calculate the average test score for each unique age and gender
combination
data %>% group_by(Age, Gender) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## `summarise()` has grouped output by 'Age'. You can override using the `.groups`
## argument.
## # A tibble: 6 × 3
## # Groups: Age [6]
## Age Gender Avg_Test_Score
## <int> <fct> <dbl>
## 1 18 Female 75.5
## 2 19 Male 68
## 3 21 Male 71
## 4 22 Male 83
## 5 23 Male 80
## 6 24 Female 79.7
55. Calculate the range of ages (max - min)
diff(range(data$Age))
## [1] 6
56. Find the student with the lowest age
data %>% filter(Age == min(Age))
## # A tibble: 2 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Alice Female 18 65
## 2 David Female 18 86
57. Calculate the percentage of students who are 19 years old and
scored 70 or above
data %>% filter(Age == 19, Test_Score >= 70) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 0
58. Find the student with the lowest test score in the age group of
24
data %>% filter(Age == 24) %>% arrange(Test_Score) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Charlie Female 24 71
59. Calculate the standard error of the mean (SEM) for test
scores
stderror <- function(x) sd(x)/sqrt(length(x))
stderror(data$Test_Score)
## [1] 2.793842
60. Calculate the percentage of students who are 20 years old and
scored 75 or above
data %>% filter(Age == 20, Test_Score >= 75) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 0
61. Calculate the percentage of students who are 21 years old and
scored below 60
data %>% filter(Age == 21, Test_Score < 60) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 0
62. Find the student with the highest age in the age group of
24
data %>% filter(Age == 24) %>% arrange(desc(Age)) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Charlie Female 24 71
63. Calculate the average test score for students who are 22 years
old or older
data %>% filter(Age >= 22) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## # A tibble: 1 × 1
## Avg_Test_Score
## <dbl>
## 1 80.8
64. Find the student with the highest test score in the age group of
20
data %>% filter(Age == 20) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 0 × 4
## # ℹ 4 variables: Name <chr>, Gender <fct>, Age <int>, Test_Score <int>
65. Calculate the percentage of students who are 18 years old and
scored 80 or above
data %>% filter(Age == 18, Test_Score >= 80) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 10
66. Find the student with the lowest test score for each gender
data %>% group_by(Gender) %>% filter(Test_Score == min(Test_Score))
## # A tibble: 2 × 4
## # Groups: Gender [2]
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Alice Female 18 65
## 2 Ivy Male 19 68
67. Calculate the number of students who are 21 years old and scored
65 or above
data %>% filter(Age == 21, Test_Score >= 65) %>% summarize(Count = n())
## # A tibble: 1 × 1
## Count
## <int>
## 1 1
68. Find the student with the highest age and the lowest test score
in the age group of 22
data %>% filter(Age == 22) %>% arrange(desc(Age), Test_Score) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Bob Male 22 78
70. Find the student with the highest age and the highest test
score
data %>% arrange(desc(Age), desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Eva Female 24 91
71. Calculate the range of test scores for male and female students
(assuming a gender column)
data %>% group_by(Gender) %>% summarize(Test_Score_Range = max(Test_Score) - min(Test_Score))
## # A tibble: 2 × 2
## Gender Test_Score_Range
## <fct> <int>
## 1 Female 26
## 2 Male 20
72. Find the student with the highest test score for each
gender
data %>% group_by(Gender) %>% filter(Test_Score == max(Test_Score))
## # A tibble: 2 × 4
## # Groups: Gender [2]
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Eva Female 24 91
## 2 Frank Male 22 88
73. Calculate the percentage of students who are 25 years old and
scored below 75
data %>% filter(Age == 25, Test_Score < 75) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 0
74. Find the student with the highest test score in the age group of
23
data %>% filter(Age == 23) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Hank Male 23 80
75. Calculate the average age of students who scored 75 or
above
data %>% filter(Test_Score >= 75) %>% summarize(Avg_Age = mean(Age, na.rm = TRUE))
## # A tibble: 1 × 1
## Avg_Age
## <dbl>
## 1 22.2
76. Calculate the percentage of students who are 19 years old and
scored below 70
data %>% filter(Age == 19, Test_Score < 70) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 10
77. Find the student with the highest test score for each unique age
and gender combination
data %>% group_by(Age, Gender) %>% filter(Test_Score == max(Test_Score))
## # A tibble: 6 × 4
## # Groups: Age, Gender [6]
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 David Female 18 86
## 2 Eva Female 24 91
## 3 Frank Male 22 88
## 4 Hank Male 23 80
## 5 Ivy Male 19 68
## 6 Jack Male 21 71
78. Calculate the average age of students who scored 80 or
above
data %>% filter(Test_Score >= 80) %>% summarize(Avg_Age = mean(Age, na.rm = TRUE))
## # A tibble: 1 × 1
## Avg_Age
## <dbl>
## 1 21.8
79. Find the student with the highest age in the age group of
23
data %>% filter(Age == 23) %>% arrange(desc(Age)) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Hank Male 23 80
80. Calculate the percentage of students who are 22 years old and
scored 60 or below
data %>% filter(Age == 22, Test_Score <= 60) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 0
81. Find the student with the lowest test score in the age group of
25
data %>% filter(Age == 25) %>% arrange(Test_Score) %>% head(1)
## # A tibble: 0 × 4
## # ℹ 4 variables: Name <chr>, Gender <fct>, Age <int>, Test_Score <int>
82. Calculate the average test score for students who are 24 years
old or older
data %>% filter(Age >= 24) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## # A tibble: 1 × 1
## Avg_Test_Score
## <dbl>
## 1 79.7
83. Find the student with the highest age and the lowest test score
in the age group of 20
data %>% filter(Age == 20) %>% arrange(desc(Age), Test_Score) %>% head(1)
## # A tibble: 0 × 4
## # ℹ 4 variables: Name <chr>, Gender <fct>, Age <int>, Test_Score <int>
85. Find the student with the highest test score in the age group of
22
data %>% filter(Age == 22) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Frank Male 22 88
86. Calculate the percentage of students who are 23 years old and
scored 70 or above
data %>% filter(Age == 23, Test_Score >= 70) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 10
87. Find the student with the lowest test score in the age group of
24
data %>% filter(Age == 24) %>% arrange(Test_Score) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Charlie Female 24 71
88. Calculate the average age of students who scored 90 or
above
data %>% filter(Test_Score >= 90) %>% summarize(Avg_Age = mean(Age, na.rm = TRUE))
## # A tibble: 1 × 1
## Avg_Age
## <dbl>
## 1 24
89. Find the student with the highest test score for each unique age
and gender combination
data %>% group_by(Age, Gender) %>% filter(Test_Score == max(Test_Score))
## # A tibble: 6 × 4
## # Groups: Age, Gender [6]
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 David Female 18 86
## 2 Eva Female 24 91
## 3 Frank Male 22 88
## 4 Hank Male 23 80
## 5 Ivy Male 19 68
## 6 Jack Male 21 71
90. Calculate the range of ages for male and female students
(assuming a gender column)
data %>% group_by(Gender) %>% summarize(Age_Range = max(Age) - min(Age))
## # A tibble: 2 × 2
## Gender Age_Range
## <fct> <int>
## 1 Female 6
## 2 Male 4
91. Find the student with the lowest test score for each age
group
data %>% group_by(Age) %>% filter(Test_Score == min(Test_Score))
## # A tibble: 6 × 4
## # Groups: Age [6]
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Alice Female 18 65
## 2 Bob Male 22 78
## 3 Charlie Female 24 71
## 4 Hank Male 23 80
## 5 Ivy Male 19 68
## 6 Jack Male 21 71
92. Calculate the percentage of students who are 24 years old and
scored 80 or above
data %>% filter(Age == 24, Test_Score >= 80) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 10
93. Find the student with the highest test score in the age group of
21
data %>% filter(Age == 21) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Jack Male 21 71
94. Calculate the average test score for students who are 25 years
old or older
data %>% filter(Age >= 25) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## # A tibble: 1 × 1
## Avg_Test_Score
## <dbl>
## 1 NaN
95. Find the student with the lowest age in the age group of 23
data %>% filter(Age == 23) %>% arrange(Age) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Hank Male 23 80
97. Find the student with the lowest age and the lowest test
score
data %>% arrange(Age, Test_Score) %>% head(1)
## # A tibble: 1 × 4
## Name Gender Age Test_Score
## <chr> <fct> <int> <int>
## 1 Alice Female 18 65
98. Calculate the percentage of students who are 20 years old and
scored 70 or above
data %>% filter(Age == 20, Test_Score >= 70) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
## Percentage
## <dbl>
## 1 0
99. Find the student with the highest test score in the age group of
25
data %>% filter(Age == 25) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 0 × 4
## # ℹ 4 variables: Name <chr>, Gender <fct>, Age <int>, Test_Score <int>
100. Calculate the average age of students who scored 60 or
below
data %>% filter(Test_Score <= 60) %>% summarize(Avg_Age = mean(Age, na.rm = TRUE))
## # A tibble: 1 × 1
## Avg_Age
## <dbl>
## 1 NaN