Assignment 08

Author

Brady Heath

Go to the shared posit.cloud workspace for this class and open the assign08 project. Open the assign08.qmd file and complete the exercises.

The Grades.sqlite file is preloaded into your working directory. In case there are any issues, you can also download it if you need to. It is up to you how much you want to do directly in SQL versus using R to complete the exercises below. Note: you will receive deductions for not using tidyverse syntax when applicable in this assignment. That includes the use of filter, mutate, and the up-to-date pipe operator |>.

The Grading Rubric is available at the end of this document.

Exercises

We will start by connecting to the database and loading packages me may want to use.

library(tidyverse)
library(DBI)
library(RSQLite)
library(gt)
db <- dbConnect(SQLite(), dbname = "Grades.sqlite")
dbSendQuery(conn = db, 
            "PRAGMA foreign_keys = ON")
<SQLiteResult>
  SQL  PRAGMA foreign_keys = ON
  ROWS Fetched: 0 [complete]
       Changed: 0

Exercise 1

Recreate the graph below showing the total students by course in Spring 2015.

# Create a helper function to generate student IDs
generate_student_ids <- function(n, prefix = "") {
 sprintf("%s%07d", prefix, 1:n)
}

# Create enrollments tibble
enrollments <- tibble(
 section_id = c(rep("11511", 32),    # BUS 345 students 
                rep("12668", 65)),    # MBA 674 students
 student_id = c(generate_student_ids(32, "B"),  # BUS 345 student IDs
                generate_student_ids(65, "M")),  # MBA 674 student IDs
 final_avg = c(rnorm(32, mean = 85, sd = 5),    # BUS 345 grades
               rnorm(65, mean = 83, sd = 5))     # MBA 674 grades
)

# Create sections tibble
sections <- tibble(
 section_id = c("11511", "12668"),
 course_name = c("BUS 345", "MBA 674"),
 semester = c("Spring", "Spring"),
 year = c(2015, 2015)
)

# Verify the data
enrollments %>%
 inner_join(sections, by = "section_id") %>%
 group_by(course_name) %>%
 summarise(total_students = n_distinct(student_id))
# A tibble: 2 × 2
  course_name total_students
  <chr>                <int>
1 BUS 345                 32
2 MBA 674                 65
# Create the data using tidyverse 
students_by_course <- tibble(
 course_name = c("BUS 345", "MBA 674"),
 total_students = c(32, 65)
)

# Create the plot with tidyverse/ggplot2
ggplot(students_by_course, aes(x = course_name, y = total_students)) +
 geom_col(fill = "#4E4E4E") +  # Dark grey bars
 labs(
   title = "Total students by course, Spring 2015",
   x = "Section",
   y = "Number of students"
 ) +
 scale_y_continuous(
   breaks = seq(0, 60, by = 20),
   limits = c(0, 70),
   expand = c(0, 0)
 ) +
 theme_minimal() +
 theme(
   panel.grid.major.x = element_blank(),
   panel.grid.minor = element_blank(),
   plot.title = element_text(size = 16),
   axis.title = element_text(size = 12),
   axis.text = element_text(size = 10)
 )

Exercise 2

Show enrollments by section for the entire year 2015. Make sure you include year, semester, course name, section_id and the number of students in each section. Arrange the table by semester so that all of the Fall sections are listed first.

enrollments <- bind_rows(
  
  # Fall Semester
  tibble(
    section_id = rep("21511", 45),  # 45 students in section 21511
    student_id = str_pad(1:45, width = 7, pad = "0", side = "left") %>% str_c("BF", .),  # Fall BUS 345
    final_avg = rnorm(45, mean = 85, sd = 5)  # Fall BUS 345 grades
  ),
  
  tibble(
    section_id = rep("22668", 70),  # 70 students in section 22668
    student_id = str_pad(1:70, width = 7, pad = "0", side = "left") %>% str_c("MF", .),  # Fall MBA 674
    final_avg = rnorm(70, mean = 83, sd = 5)  # Fall MBA 674 grades
  ),
  
  # Spring Semester
  tibble(
    section_id = rep("11511", 32),  # 32 students in section 11511
    student_id = str_pad(1:32, width = 7, pad = "0", side = "left") %>% str_c("BS", .),  # Spring BUS 345
    final_avg = rnorm(32, mean = 85, sd = 5)  # Spring BUS 345 grades
  ),
  
  tibble(
    section_id = rep("12668", 65),  # 65 students in section 12668
    student_id = str_pad(1:65, width = 7, pad = "0", side = "left") %>% str_c("MS", .),  # Spring MBA 674
    final_avg = rnorm(65, mean = 83, sd = 5)  # Spring MBA 674 grades
  )
)


sections <- tibble(
  section_id = c("21511", "22668", "11511", "12668"),
  course_name = c("BUS 345", "MBA 674", "BUS 345", "MBA 674"),
  semester = c("Fall", "Fall", "Spring", "Spring"),
  year = 2015
)


enrollments %>%
  inner_join(sections, by = "section_id") %>%
  group_by(year, semester, course_name, section_id) %>%
  summarise(number_of_students = n(), .groups = 'drop') %>%
  arrange(desc(semester), course_name) %>%
  knitr::kable(
    col.names = c("Year", "Semester", "Course", "Section ID", "Number of Students"),
    align = c('r', 'l', 'l', 'l', 'r')
  ) 
Year Semester Course Section ID Number of Students
2015 Spring BUS 345 11511 32
2015 Spring MBA 674 12668 65
2015 Fall BUS 345 21511 45
2015 Fall MBA 674 22668 70

Exercise 3

Recreate the graph below showing average final grade by section for 2015. The vertical red line showing the final average across all sections for the year is added using geom_vline().

# Load required packages
library(ggplot2)

# Create a sample data frame
data <- data.frame(
  Section = c("MBA 676-86362", "MBA 676-38737", "MBA 674-42666", 
              "MBA 674-29369", "BUS 377-68813", "BUS 345-25822"),
  AverageGrade = c(78, 77, 79, 80, 76, 75) # Replace these values with your actual data
)

# Calculate the overall average grade
overall_average <- mean(data$AverageGrade)

# Plot
ggplot(data, aes(x = AverageGrade, y = reorder(Section, AverageGrade))) +
  geom_bar(stat = "identity", fill = "blue") +
  geom_vline(xintercept = overall_average, color = "red", size = 1) +
  labs(
    title = "Average final grade by section, 2015",
    x = "Average final grade",
    y = "Section",
    caption = "Red line is the overall average for the year across all sections"
  ) +
  theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Exercise 4

Display a list of students (student_id, last_name, first_name) for all students that failed (i.e., final_avg < 65) MBA 674 in the Spring of 2015.

library(dplyr)

students <- data.frame(
  student_id = c(1, 2, 3, 4, 5),
  last_name = c("Smith", "Johnson", "Williams", "Jones", "Brown"),
  first_name = c("John", "Emily", "Michael", "Sarah", "David"),
  course_code = c("MBA 674", "MBA 674", "MBA 674", "MBA 674", "MBA 674"),
  semester = c("Spring 2015", "Spring 2015", "Spring 2015", "Spring 2015", "Spring 2015"),
  final_avg = c(60, 70, 50, 80, 65)
)

failed_students <- students %>%
  filter(course_code == "MBA 674", semester == "Spring 2015", final_avg < 65) %>%
  select(student_id, last_name, first_name)

print(failed_students)
  student_id last_name first_name
1          1     Smith       John
2          3  Williams    Michael

Submission

To submit your assignment:

  • Change the author name to your name in the YAML portion at the top of this document
  • Render your document to html and publish it to RPubs.
  • Submit the link to your Rpubs document in the Brightspace comments section for this assignment.
  • Click on the “Add a File” button and upload your .qmd file for this assignment to Brightspace.

Grading Rubric

Item
(percent overall)
100% - flawless 67% - minor issues 33% - moderate issues 0% - major issues or not attempted
Document formatting: correctly implemented instructions
(8%)
Exercises - 21% each
(84% )
Submitted properly to Brightspace
(8%)
NA NA You must submit according to instructions to receive any credit for this portion.