library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Create a list of student names
student_names <- c("Alice", "Bob", "Charlie", "David", "Eva", "Frank", "Grace", "Hank", "Ivy", "Jack")

# Generate random ages for the students
student_ages <- sample(18:25, length(student_names), replace = TRUE)

# Generate random test scores for each student
test_scores <- sample(60:100, length(student_names), replace = TRUE)

# Define the genders
genders <- c("Male", "Female")

# Create a data frame and convert it into tibble to store the data
data <- as.tibble(data.frame(
  Name = student_names,
  Gender = as.factor(sample(genders, length(student_names), replace = TRUE)),
  Age = student_ages,
  Test_Score = test_scores
))
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Display the dataset
data
## # A tibble: 10 × 4
##    Name    Gender   Age Test_Score
##    <chr>   <fct>  <int>      <int>
##  1 Alice   Female    18         65
##  2 Bob     Male      22         78
##  3 Charlie Female    24         71
##  4 David   Female    18         86
##  5 Eva     Female    24         91
##  6 Frank   Male      22         88
##  7 Grace   Female    24         77
##  8 Hank    Male      23         80
##  9 Ivy     Male      19         68
## 10 Jack    Male      21         71

1. Display the first 5 rows of the dataset

head(data)
## # A tibble: 6 × 4
##   Name    Gender   Age Test_Score
##   <chr>   <fct>  <int>      <int>
## 1 Alice   Female    18         65
## 2 Bob     Male      22         78
## 3 Charlie Female    24         71
## 4 David   Female    18         86
## 5 Eva     Female    24         91
## 6 Frank   Male      22         88

2. Display basic statistics of the numeric columns

summary(data)
##      Name              Gender       Age          Test_Score  
##  Length:10          Female:5   Min.   :18.00   Min.   :65.0  
##  Class :character   Male  :5   1st Qu.:19.50   1st Qu.:71.0  
##  Mode  :character              Median :22.00   Median :77.5  
##                                Mean   :21.50   Mean   :77.5  
##                                3rd Qu.:23.75   3rd Qu.:84.5  
##                                Max.   :24.00   Max.   :91.0

3. Count the number of rows and columns

dim(data)
## [1] 10  4

4. Display the unique ages in the dataset

unique(data$Age)
## [1] 18 22 24 23 19 21

5. Calculate the average test score

mean(data$Test_Score)
## [1] 77.5

6. Find the student with the highest test score

data %>% filter(Test_Score == max(Test_Score))
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Eva   Female    24         91

7. Filter students who are older than 21

data %>% filter(Age > 21)
## # A tibble: 6 × 4
##   Name    Gender   Age Test_Score
##   <chr>   <fct>  <int>      <int>
## 1 Bob     Male      22         78
## 2 Charlie Female    24         71
## 3 Eva     Female    24         91
## 4 Frank   Male      22         88
## 5 Grace   Female    24         77
## 6 Hank    Male      23         80

8. Group students by age and calculate the average test score for each age group

data %>% group_by(Age) %>% summarize(Avg_Test_Score = mean(Test_Score))
## # A tibble: 6 × 2
##     Age Avg_Test_Score
##   <int>          <dbl>
## 1    18           75.5
## 2    19           68  
## 3    21           71  
## 4    22           83  
## 5    23           80  
## 6    24           79.7

9. Sort the dataset by test score in descending order

data %>% arrange(desc(Test_Score))
## # A tibble: 10 × 4
##    Name    Gender   Age Test_Score
##    <chr>   <fct>  <int>      <int>
##  1 Eva     Female    24         91
##  2 Frank   Male      22         88
##  3 David   Female    18         86
##  4 Hank    Male      23         80
##  5 Bob     Male      22         78
##  6 Grace   Female    24         77
##  7 Charlie Female    24         71
##  8 Jack    Male      21         71
##  9 Ivy     Male      19         68
## 10 Alice   Female    18         65

10. Count the number of students in each age group

data %>% count(Age)
## # A tibble: 6 × 2
##     Age     n
##   <int> <int>
## 1    18     2
## 2    19     1
## 3    21     1
## 4    22     2
## 5    23     1
## 6    24     3

11. Calculate the standard deviation of test scores

sd(data$Test_Score)
## [1] 8.834906

12. Find the student(s) with the lowest test score

data %>% filter(Test_Score == min(Test_Score))
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Alice Female    18         65

13. Filter students with test scores above 90

data %>% filter(Test_Score > 90)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Eva   Female    24         91

14. Calculate the median age of the students

median(data$Age)
## [1] 22

15. Calculate the correlation between age and test scores

cor(data$Age, data$Test_Score)
## [1] 0.3462732

16. Display the last 5 rows of the dataset

tail(data)
## # A tibble: 6 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Eva   Female    24         91
## 2 Frank Male      22         88
## 3 Grace Female    24         77
## 4 Hank  Male      23         80
## 5 Ivy   Male      19         68
## 6 Jack  Male      21         71

17. Filter students who scored below 70

data %>% filter(Test_Score < 70)
## # A tibble: 2 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Alice Female    18         65
## 2 Ivy   Male      19         68

18. Group students by test score range and count the number of students in each range

data %>% group_by(Test_Score_Range = cut(Test_Score, breaks = seq(0, 100, by = 10))) %>% summarize(Count = n())
## # A tibble: 4 × 2
##   Test_Score_Range Count
##   <fct>            <int>
## 1 (60,70]              2
## 2 (70,80]              5
## 3 (80,90]              2
## 4 (90,100]             1

19. Calculate the total test score of all students

sum(data$Test_Score)
## [1] 775

20. Calculate the percentage of students who passed (scored 70 or above)

passed_students <- data %>% filter(Test_Score >= 70)
percentage_passed <- nrow(passed_students) / nrow(data) * 100

21. Find the student with the longest name

data %>% arrange(desc(nchar(Name))) %>% head(1)
## # A tibble: 1 × 4
##   Name    Gender   Age Test_Score
##   <chr>   <fct>  <int>      <int>
## 1 Charlie Female    24         71

22. Calculate the range of test scores (max - min)

diff(range(data$Test_Score))
## [1] 26

23. Find the student with the highest age

data %>% filter(Age == max(Age))
## # A tibble: 3 × 4
##   Name    Gender   Age Test_Score
##   <chr>   <fct>  <int>      <int>
## 1 Charlie Female    24         71
## 2 Eva     Female    24         91
## 3 Grace   Female    24         77

24. Calculate the average age of male and female students (assuming a gender column)

data %>% group_by(Gender) %>% summarize(Avg_Age = mean(Age, na.rm = TRUE))
## # A tibble: 2 × 2
##   Gender Avg_Age
##   <fct>    <dbl>
## 1 Female    21.6
## 2 Male      21.4

25. Calculate the total number of male and female students (assuming a gender column)

data %>% count(Gender)
## # A tibble: 2 × 2
##   Gender     n
##   <fct>  <int>
## 1 Female     5
## 2 Male       5

26. Find the student with the closest age to the median age

median_age <- median(data$Age)
data %>% mutate(Age_Difference = abs(Age - median_age)) %>% arrange(Age_Difference) %>% head(1)
## # A tibble: 1 × 5
##   Name  Gender   Age Test_Score Age_Difference
##   <chr> <fct>  <int>      <int>          <dbl>
## 1 Bob   Male      22         78              0

27. Calculate the median test score

median(data$Test_Score)
## [1] 77.5

28. Filter students who are 20 years old and scored above 80

data %>% filter(Age == 20, Test_Score > 80)
## # A tibble: 0 × 4
## # ℹ 4 variables: Name <chr>, Gender <fct>, Age <int>, Test_Score <int>

29. Find the student with the highest test score for each age group

data %>% group_by(Age) %>% slice(which.max(Test_Score))
## # A tibble: 6 × 4
## # Groups:   Age [6]
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 David Female    18         86
## 2 Ivy   Male      19         68
## 3 Jack  Male      21         71
## 4 Frank Male      22         88
## 5 Hank  Male      23         80
## 6 Eva   Female    24         91

30. Calculate the correlation between age and test scores

cor(data$Age, data$Test_Score)
## [1] 0.3462732

31. Calculate the number of students in each age group, grouped in 5-year intervals

data %>% group_by(Age_Group = cut(Age, breaks = seq(18, 30, by = 5))) %>% summarize(Count = n())
## # A tibble: 3 × 2
##   Age_Group Count
##   <fct>     <int>
## 1 (18,23]       5
## 2 (23,28]       3
## 3 <NA>          2

32. Calculate the average age of students with test scores above 90

data %>% filter(Test_Score > 90) %>% summarize(Avg_Age = mean(Age))
## # A tibble: 1 × 1
##   Avg_Age
##     <dbl>
## 1      24

33. Find the student with the lowest test score in the age group of 22

data %>% filter(Age == 22) %>% arrange(Test_Score) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Bob   Male      22         78

34. Calculate the percentage of students who are 25 years old

data %>% filter(Age == 25) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1          0

35. Group students by age and gender and calculate the average test score for each group

data %>% group_by(Age, Gender) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## `summarise()` has grouped output by 'Age'. You can override using the `.groups`
## argument.
## # A tibble: 6 × 3
## # Groups:   Age [6]
##     Age Gender Avg_Test_Score
##   <int> <fct>           <dbl>
## 1    18 Female           75.5
## 2    19 Male             68  
## 3    21 Male             71  
## 4    22 Male             83  
## 5    23 Male             80  
## 6    24 Female           79.7

36. Calculate the median test score for students aged 21 or younger

data %>% filter(Age <= 21) %>% summarize(Median_Test_Score = median(Test_Score))
## # A tibble: 1 × 1
##   Median_Test_Score
##               <dbl>
## 1              69.5

37. Find the student with the closest test score to the median test score

median_test_score <- median(data$Test_Score)
data %>% mutate(Test_Score_Difference = abs(Test_Score - median_test_score)) %>% arrange(Test_Score_Difference) %>% head(1)
## # A tibble: 1 × 5
##   Name  Gender   Age Test_Score Test_Score_Difference
##   <chr> <fct>  <int>      <int>                 <dbl>
## 1 Bob   Male      22         78                   0.5

38. Calculate the average age of students who scored below 70

data %>% filter(Test_Score < 70) %>% summarize(Avg_Age = mean(Age))
## # A tibble: 1 × 1
##   Avg_Age
##     <dbl>
## 1    18.5

39. Calculate the total number of students for each unique age

data %>% group_by(Age) %>% tally()
## # A tibble: 6 × 2
##     Age     n
##   <int> <int>
## 1    18     2
## 2    19     1
## 3    21     1
## 4    22     2
## 5    23     1
## 6    24     3

40. Calculate the variance of test scores

var(data$Test_Score)
## [1] 78.05556

41. Calculate the percentage of students who are 24 years old and scored between 70 and 80

data %>% filter(Age == 24, Test_Score >= 70, Test_Score <= 80) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1         20

42. Find the student with the highest test score among students aged 23

data %>% filter(Age == 23) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Hank  Male      23         80

43. Calculate the percentage of male and female students (assuming a gender column)

data %>% group_by(Gender) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 2 × 2
##   Gender Percentage
##   <fct>       <dbl>
## 1 Female         50
## 2 Male           50

44. Calculate the interquartile range (IQR) of test scores

IQR(data$Test_Score)
## [1] 13.5

45. Find the student with the lowest test score for each age group

data %>% group_by(Age) %>% filter(Test_Score == min(Test_Score))
## # A tibble: 6 × 4
## # Groups:   Age [6]
##   Name    Gender   Age Test_Score
##   <chr>   <fct>  <int>      <int>
## 1 Alice   Female    18         65
## 2 Bob     Male      22         78
## 3 Charlie Female    24         71
## 4 Hank    Male      23         80
## 5 Ivy     Male      19         68
## 6 Jack    Male      21         71

46. Calculate the coefficient of variation (CV) for test scores

cv <- sd(data$Test_Score) / mean(data$Test_Score) * 100
cv
## [1] 11.39988

47. Find the student with the highest age and test score

data %>% arrange(desc(Age), desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Eva   Female    24         91

48. Calculate the average test score for male and female students (assuming a gender column)

data %>% group_by(Gender) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## # A tibble: 2 × 2
##   Gender Avg_Test_Score
##   <fct>           <dbl>
## 1 Female             78
## 2 Male               77

49. Calculate the number of students who are 22 years old and scored above 75

data %>% filter(Age == 22, Test_Score > 75) %>% summarize(Count = n())
## # A tibble: 1 × 1
##   Count
##   <int>
## 1     2

50. Find the student with the highest test score among students aged 21 or younger

data %>% filter(Age <= 21) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 David Female    18         86

51. Calculate the percentage of students who are 23 years old and scored 60 or below

data %>% filter(Age == 23, Test_Score <= 60) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1          0

52. Find the student with the highest age and the lowest test score

data %>% arrange(desc(Age), Test_Score) %>% head(1)
## # A tibble: 1 × 4
##   Name    Gender   Age Test_Score
##   <chr>   <fct>  <int>      <int>
## 1 Charlie Female    24         71

53. Calculate the median age of students who scored above 80

data %>% filter(Test_Score > 80) %>% summarize(Median_Age = median(Age))
## # A tibble: 1 × 1
##   Median_Age
##        <int>
## 1         22

54. Calculate the average test score for each unique age and gender combination

data %>% group_by(Age, Gender) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## `summarise()` has grouped output by 'Age'. You can override using the `.groups`
## argument.
## # A tibble: 6 × 3
## # Groups:   Age [6]
##     Age Gender Avg_Test_Score
##   <int> <fct>           <dbl>
## 1    18 Female           75.5
## 2    19 Male             68  
## 3    21 Male             71  
## 4    22 Male             83  
## 5    23 Male             80  
## 6    24 Female           79.7

55. Calculate the range of ages (max - min)

diff(range(data$Age))
## [1] 6

56. Find the student with the lowest age

data %>% filter(Age == min(Age))
## # A tibble: 2 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Alice Female    18         65
## 2 David Female    18         86

57. Calculate the percentage of students who are 19 years old and scored 70 or above

data %>% filter(Age == 19, Test_Score >= 70) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1          0

58. Find the student with the lowest test score in the age group of 24

data %>% filter(Age == 24) %>% arrange(Test_Score) %>% head(1)
## # A tibble: 1 × 4
##   Name    Gender   Age Test_Score
##   <chr>   <fct>  <int>      <int>
## 1 Charlie Female    24         71

59. Calculate the standard error of the mean (SEM) for test scores

stderror <- function(x) sd(x)/sqrt(length(x))
stderror(data$Test_Score)
## [1] 2.793842

60. Calculate the percentage of students who are 20 years old and scored 75 or above

data %>% filter(Age == 20, Test_Score >= 75) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1          0

61. Calculate the percentage of students who are 21 years old and scored below 60

data %>% filter(Age == 21, Test_Score < 60) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1          0

62. Find the student with the highest age in the age group of 24

data %>% filter(Age == 24) %>% arrange(desc(Age)) %>% head(1)
## # A tibble: 1 × 4
##   Name    Gender   Age Test_Score
##   <chr>   <fct>  <int>      <int>
## 1 Charlie Female    24         71

63. Calculate the average test score for students who are 22 years old or older

data %>% filter(Age >= 22) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## # A tibble: 1 × 1
##   Avg_Test_Score
##            <dbl>
## 1           80.8

64. Find the student with the highest test score in the age group of 20

data %>% filter(Age == 20) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 0 × 4
## # ℹ 4 variables: Name <chr>, Gender <fct>, Age <int>, Test_Score <int>

65. Calculate the percentage of students who are 18 years old and scored 80 or above

data %>% filter(Age == 18, Test_Score >= 80) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1         10

66. Find the student with the lowest test score for each gender

data %>% group_by(Gender) %>% filter(Test_Score == min(Test_Score))
## # A tibble: 2 × 4
## # Groups:   Gender [2]
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Alice Female    18         65
## 2 Ivy   Male      19         68

67. Calculate the number of students who are 21 years old and scored 65 or above

data %>% filter(Age == 21, Test_Score >= 65) %>% summarize(Count = n())
## # A tibble: 1 × 1
##   Count
##   <int>
## 1     1

68. Find the student with the highest age and the lowest test score in the age group of 22

data %>% filter(Age == 22) %>% arrange(desc(Age), Test_Score) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Bob   Male      22         78

69. Calculate the median test score for male and female students (assuming a gender column)

data %>% group_by(Gender) %>% summarize(Median_Test_Score = median(Test_Score, na.rm = TRUE))
## # A tibble: 2 × 2
##   Gender Median_Test_Score
##   <fct>              <int>
## 1 Female                77
## 2 Male                  78

70. Find the student with the highest age and the highest test score

data %>% arrange(desc(Age), desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Eva   Female    24         91

71. Calculate the range of test scores for male and female students (assuming a gender column)

data %>% group_by(Gender) %>% summarize(Test_Score_Range = max(Test_Score) - min(Test_Score))
## # A tibble: 2 × 2
##   Gender Test_Score_Range
##   <fct>             <int>
## 1 Female               26
## 2 Male                 20

72. Find the student with the highest test score for each gender

data %>% group_by(Gender) %>% filter(Test_Score == max(Test_Score))
## # A tibble: 2 × 4
## # Groups:   Gender [2]
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Eva   Female    24         91
## 2 Frank Male      22         88

73. Calculate the percentage of students who are 25 years old and scored below 75

data %>% filter(Age == 25, Test_Score < 75) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1          0

74. Find the student with the highest test score in the age group of 23

data %>% filter(Age == 23) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Hank  Male      23         80

75. Calculate the average age of students who scored 75 or above

data %>% filter(Test_Score >= 75) %>% summarize(Avg_Age = mean(Age, na.rm = TRUE))
## # A tibble: 1 × 1
##   Avg_Age
##     <dbl>
## 1    22.2

76. Calculate the percentage of students who are 19 years old and scored below 70

data %>% filter(Age == 19, Test_Score < 70) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1         10

77. Find the student with the highest test score for each unique age and gender combination

data %>% group_by(Age, Gender) %>% filter(Test_Score == max(Test_Score))
## # A tibble: 6 × 4
## # Groups:   Age, Gender [6]
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 David Female    18         86
## 2 Eva   Female    24         91
## 3 Frank Male      22         88
## 4 Hank  Male      23         80
## 5 Ivy   Male      19         68
## 6 Jack  Male      21         71

78. Calculate the average age of students who scored 80 or above

data %>% filter(Test_Score >= 80) %>% summarize(Avg_Age = mean(Age, na.rm = TRUE))
## # A tibble: 1 × 1
##   Avg_Age
##     <dbl>
## 1    21.8

79. Find the student with the highest age in the age group of 23

data %>% filter(Age == 23) %>% arrange(desc(Age)) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Hank  Male      23         80

80. Calculate the percentage of students who are 22 years old and scored 60 or below

data %>% filter(Age == 22, Test_Score <= 60) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1          0

81. Find the student with the lowest test score in the age group of 25

data %>% filter(Age == 25) %>% arrange(Test_Score) %>% head(1)
## # A tibble: 0 × 4
## # ℹ 4 variables: Name <chr>, Gender <fct>, Age <int>, Test_Score <int>

82. Calculate the average test score for students who are 24 years old or older

data %>% filter(Age >= 24) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## # A tibble: 1 × 1
##   Avg_Test_Score
##            <dbl>
## 1           79.7

83. Find the student with the highest age and the lowest test score in the age group of 20

data %>% filter(Age == 20) %>% arrange(desc(Age), Test_Score) %>% head(1)
## # A tibble: 0 × 4
## # ℹ 4 variables: Name <chr>, Gender <fct>, Age <int>, Test_Score <int>

84. Calculate the median test score for students who are 21 years old or younger

data %>% filter(Age <= 21) %>% summarize(Median_Test_Score = median(Test_Score))
## # A tibble: 1 × 1
##   Median_Test_Score
##               <dbl>
## 1              69.5

85. Find the student with the highest test score in the age group of 22

data %>% filter(Age == 22) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Frank Male      22         88

86. Calculate the percentage of students who are 23 years old and scored 70 or above

data %>% filter(Age == 23, Test_Score >= 70) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1         10

87. Find the student with the lowest test score in the age group of 24

data %>% filter(Age == 24) %>% arrange(Test_Score) %>% head(1)
## # A tibble: 1 × 4
##   Name    Gender   Age Test_Score
##   <chr>   <fct>  <int>      <int>
## 1 Charlie Female    24         71

88. Calculate the average age of students who scored 90 or above

data %>% filter(Test_Score >= 90) %>% summarize(Avg_Age = mean(Age, na.rm = TRUE))
## # A tibble: 1 × 1
##   Avg_Age
##     <dbl>
## 1      24

89. Find the student with the highest test score for each unique age and gender combination

data %>% group_by(Age, Gender) %>% filter(Test_Score == max(Test_Score))
## # A tibble: 6 × 4
## # Groups:   Age, Gender [6]
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 David Female    18         86
## 2 Eva   Female    24         91
## 3 Frank Male      22         88
## 4 Hank  Male      23         80
## 5 Ivy   Male      19         68
## 6 Jack  Male      21         71

90. Calculate the range of ages for male and female students (assuming a gender column)

data %>% group_by(Gender) %>% summarize(Age_Range = max(Age) - min(Age))
## # A tibble: 2 × 2
##   Gender Age_Range
##   <fct>      <int>
## 1 Female         6
## 2 Male           4

91. Find the student with the lowest test score for each age group

data %>% group_by(Age) %>% filter(Test_Score == min(Test_Score))
## # A tibble: 6 × 4
## # Groups:   Age [6]
##   Name    Gender   Age Test_Score
##   <chr>   <fct>  <int>      <int>
## 1 Alice   Female    18         65
## 2 Bob     Male      22         78
## 3 Charlie Female    24         71
## 4 Hank    Male      23         80
## 5 Ivy     Male      19         68
## 6 Jack    Male      21         71

92. Calculate the percentage of students who are 24 years old and scored 80 or above

data %>% filter(Age == 24, Test_Score >= 80) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1         10

93. Find the student with the highest test score in the age group of 21

data %>% filter(Age == 21) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Jack  Male      21         71

94. Calculate the average test score for students who are 25 years old or older

data %>% filter(Age >= 25) %>% summarize(Avg_Test_Score = mean(Test_Score, na.rm = TRUE))
## # A tibble: 1 × 1
##   Avg_Test_Score
##            <dbl>
## 1            NaN

95. Find the student with the lowest age in the age group of 23

data %>% filter(Age == 23) %>% arrange(Age) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Hank  Male      23         80

96. Calculate the median test score for male and female students (assuming a gender column)

data %>% group_by(Gender) %>% summarize(Median_Test_Score = median(Test_Score, na.rm = TRUE))
## # A tibble: 2 × 2
##   Gender Median_Test_Score
##   <fct>              <int>
## 1 Female                77
## 2 Male                  78

97. Find the student with the lowest age and the lowest test score

data %>% arrange(Age, Test_Score) %>% head(1)
## # A tibble: 1 × 4
##   Name  Gender   Age Test_Score
##   <chr> <fct>  <int>      <int>
## 1 Alice Female    18         65

98. Calculate the percentage of students who are 20 years old and scored 70 or above

data %>% filter(Age == 20, Test_Score >= 70) %>% summarize(Percentage = n() / nrow(data) * 100)
## # A tibble: 1 × 1
##   Percentage
##        <dbl>
## 1          0

99. Find the student with the highest test score in the age group of 25

data %>% filter(Age == 25) %>% arrange(desc(Test_Score)) %>% head(1)
## # A tibble: 0 × 4
## # ℹ 4 variables: Name <chr>, Gender <fct>, Age <int>, Test_Score <int>

100. Calculate the average age of students who scored 60 or below

data %>% filter(Test_Score <= 60) %>% summarize(Avg_Age = mean(Age, na.rm = TRUE))
## # A tibble: 1 × 1
##   Avg_Age
##     <dbl>
## 1     NaN