Assignment 08

Author

Constance Nahimana

Open the assign08.qmd file and complete the exercises.

The Grades.sqlite file is preloaded into your working directory. In case there are any issues, you can also download it if you need to. It is up to you how much you want to do directly in SQL versus using R to complete the exercises below. Note: you will receive deductions for not using tidyverse syntax when applicable in this assignment. That includes the use of filter, mutate, and the up-to-date pipe operator |>.

The Grading Rubric is available at the end of this document.

Exercises

We will start by connecting to the database and loading packages me may want to use.

library(tidyverse)
library(DBI)
library(RSQLite)
Warning: package 'RSQLite' was built under R version 4.4.3
library(gt)
db <- dbConnect(SQLite(), dbname = "Grades.sqlite")
dbSendQuery(conn = db, 
            "PRAGMA foreign_keys = ON")
<SQLiteResult>
  SQL  PRAGMA foreign_keys = ON
  ROWS Fetched: 0 [complete]
       Changed: 0

Exercise 1

Recreate the graph below showing the total students by course in Spring 2015.

library(tidyverse)
library(DBI)
library(RSQLite)

# Connect to the database
db <- dbConnect(SQLite(), dbname = "Grades.sqlite")

# Load tables from the database
grades <- tbl(db, "grades")
sections <- tbl(db, "sections")

# First: check what fields are available in sections
print(colnames(sections))  # should show "semester" and "year"
[1] "section_id" "name"       "semester"   "year"      
# Step 1: Safely preview a few values in semester and year columns
sample_data <- sections |>
  select(name, semester, year) |>
  distinct() |>
  collect()

print(sample_data)
# A tibble: 12 × 3
   name    semester year 
   <chr>   <chr>    <chr>
 1 MBA 676 Fall     2014 
 2 MBA 674 Spring   2015 
 3 BUS 377 Fall     2016 
 4 BUS 345 Spring   2014 
 5 MBA 676 Fall     2015 
 6 MBA 674 Spring   2016 
 7 BUS 377 Fall     2014 
 8 BUS 345 Spring   2015 
 9 MBA 676 Fall     2016 
10 MBA 674 Spring   2014 
11 BUS 377 Fall     2015 
12 BUS 345 Spring   2016 
# Connect to database
db <- dbConnect(SQLite(), dbname = "Grades.sqlite")

# Load tables
grades <- tbl(db, "grades")
sections <- tbl(db, "sections")

# Build summary for BUS 345 and MBA 674 in Spring 2015
course_summary <- grades |>
  inner_join(sections, by = "section_id") |>
  filter(semester == "Spring", year == "2015", name %in% c("BUS 345", "MBA 674")) |>
  group_by(name) |>
  summarize(TotalStudents = n(), .groups = "drop") |>
  collect()

# Plot
ggplot(course_summary, aes(x = name, y = TotalStudents)) +
  geom_col(fill = "gray30") +
  labs(
    title = "Total students by course, Spring 2015",
    x = "Section",
    y = "Number of students"
  ) +
  theme_minimal()

Exercise 2

Show enrollments by section for the entire year 2015. Make sure you include year, semester, course name, section_id and the number of students in each section. Arrange the table by semester so that all of the Fall sections are listed first.

library(tidyverse)
library(DBI)
library(RSQLite)

# Connect to the database
db <- dbConnect(SQLite(), dbname = "Grades.sqlite")

# Load the tables
grades <- tbl(db, "grades")
sections <- tbl(db, "sections")

# Join, filter for 2015, and summarize
enrollments_2015 <- grades |>
  inner_join(sections, by = "section_id") |>
  filter(year == 2015 | year == "2015") |>
  group_by(year, semester, name, section_id) |>
  summarize(num_students = n(), .groups = "drop") |>
  collect() |>
  mutate(
    semester = factor(semester, levels = c("Fall", "Spring", "Summer"))
  ) |>
  arrange(semester, name)
View(enrollments_2015)
print(enrollments_2015)
# A tibble: 6 × 5
  year  semester name    section_id num_students
  <chr> <fct>    <chr>   <chr>             <int>
1 2015  Fall     BUS 377 68813                36
2 2015  Fall     MBA 676 38737                33
3 2015  Fall     MBA 676 86362                39
4 2015  Spring   BUS 345 25822                31
5 2015  Spring   MBA 674 29369                24
6 2015  Spring   MBA 674 42666                40
write_csv(enrollments_2015, "enrollments_by_section_2015.csv")

Exercise 3

Recreate the graph below showing average final grade by section for 2015. The vertical red line showing the final average across all sections for the year is added using geom_vline().

#Load tables, prepare data, Plot:

library(tidyverse)
library(DBI)
library(RSQLite)

# Connect to database
db <- dbConnect(SQLite(), dbname = "Grades.sqlite")

# Load tables
grades <- tbl(db, "grades")
sections <- tbl(db, "sections")

# Join, filter, and collect data for 2015
section_data <- grades |>
  inner_join(sections, by = "section_id") |>
  filter(year == "2015") |>
  collect()

# Combine course name and section ID
section_data <- section_data |>
  mutate(section_label = paste(name, section_id, sep = "-"))

# Compute average final grade per section
avg_final_by_section <- section_data |>
  group_by(section_label) |>
  summarize(avg_final = mean(final_avg, na.rm = TRUE), .groups = "drop")

# Set BUS 345-25822 as the first (top) section
avg_final_by_section <- avg_final_by_section |>
  mutate(section_label = factor(section_label, levels = c(
    "BUS 345-25822",
    setdiff(section_label, "BUS 345-25822")
  )))

# Compute overall average
overall_avg <- mean(avg_final_by_section$avg_final, na.rm = TRUE)

# Create the plot
ggplot(avg_final_by_section, aes(x = avg_final, y = section_label)) +
  geom_col(fill = "blue") +
  geom_vline(xintercept = overall_avg, color = "red", linewidth = 1) +
  labs(
    title = "Average final grade by section, 2015",
    x = "Average final grade",
    y = "Section",
    caption = "Red line is the overall average for the year across all sections"
  ) +
  theme_minimal()

Exercise 4

Display a list of students (student_id, last_name, first_name) for all students that failed (i.e., final_avg < 65) MBA 674 in the Spring of 2015.

#Connect & load tables &Filter students who failed MBA 674 (Spring 2015)

library(tidyverse)
library(DBI)
library(RSQLite)

# Connect to the database
db <- dbConnect(SQLite(), dbname = "Grades.sqlite")

# Load tables
grades <- tbl(db, "grades")
sections <- tbl(db, "sections")
students <- tbl(db, "students")


# Query: Students who failed MBA 674 in Spring 2015
mba674_fails <- grades |>
  inner_join(sections, by = "section_id") |>
  inner_join(students, by = "student_id") |>
  filter(
    final_avg < 65,
    semester == "Spring",
    year == "2015",
    name == "MBA 674"
  ) |>
  select(student_id, last_name, first_name) |>
  arrange(last_name, first_name) |>
  collect()

# View the result
print(mba674_fails)
# A tibble: 5 × 3
  student_id last_name first_name
  <chr>      <chr>     <chr>     
1 7197441    Brierley  Sergio    
2 7237806    Fletcher  Vicky     
3 9553576    Garcia    Daniel    
4 7352157    Gonzales  Kyrie     
5 6106351    Middleton Sheridan  

Five students failed MBA 674 in Spring 2015.

Submission

To submit your assignment:

  • Change the author name to your name in the YAML portion at the top of this document
  • Render your document to html and publish it to RPubs.
  • Submit the link to your Rpubs document in the Brightspace comments section for this assignment.
  • Click on the “Add a File” button and upload your .qmd file for this assignment to Brightspace.

Grading Rubric

Item
(percent overall)
100% - flawless 67% - minor issues 33% - moderate issues 0% - major issues or not attempted
Document formatting: correctly implemented instructions
(8%)
Exercises - 21% each
(84% )
Submitted properly to Brightspace
(8%)
NA NA You must submit according to instructions to receive any credit for this portion.