Open the assign08.qmd file and complete the exercises.
The Grades.sqlite file is preloaded into your working directory. In case there are any issues, you can also download it if you need to. It is up to you how much you want to do directly in SQL versus using R to complete the exercises below. Note: you will receive deductions for not using tidyverse syntax when applicable in this assignment. That includes the use of filter, mutate, and the up-to-date pipe operator |>.
The Grading Rubric is available at the end of this document.
Exercises
We will start by connecting to the database and loading packages me may want to use.
library(tidyverse)library(DBI)library(RSQLite)
Warning: package 'RSQLite' was built under R version 4.4.3
Recreate the graph below showing the total students by course in Spring 2015.
library(tidyverse)library(DBI)library(RSQLite)# Connect to the databasedb <-dbConnect(SQLite(), dbname ="Grades.sqlite")# Load tables from the databasegrades <-tbl(db, "grades")sections <-tbl(db, "sections")# First: check what fields are available in sectionsprint(colnames(sections)) # should show "semester" and "year"
[1] "section_id" "name" "semester" "year"
# Step 1: Safely preview a few values in semester and year columnssample_data <- sections |>select(name, semester, year) |>distinct() |>collect()print(sample_data)
# A tibble: 12 × 3
name semester year
<chr> <chr> <chr>
1 MBA 676 Fall 2014
2 MBA 674 Spring 2015
3 BUS 377 Fall 2016
4 BUS 345 Spring 2014
5 MBA 676 Fall 2015
6 MBA 674 Spring 2016
7 BUS 377 Fall 2014
8 BUS 345 Spring 2015
9 MBA 676 Fall 2016
10 MBA 674 Spring 2014
11 BUS 377 Fall 2015
12 BUS 345 Spring 2016
# Connect to databasedb <-dbConnect(SQLite(), dbname ="Grades.sqlite")# Load tablesgrades <-tbl(db, "grades")sections <-tbl(db, "sections")# Build summary for BUS 345 and MBA 674 in Spring 2015course_summary <- grades |>inner_join(sections, by ="section_id") |>filter(semester =="Spring", year =="2015", name %in%c("BUS 345", "MBA 674")) |>group_by(name) |>summarize(TotalStudents =n(), .groups ="drop") |>collect()# Plotggplot(course_summary, aes(x = name, y = TotalStudents)) +geom_col(fill ="gray30") +labs(title ="Total students by course, Spring 2015",x ="Section",y ="Number of students" ) +theme_minimal()
Exercise 2
Show enrollments by section for the entire year 2015. Make sure you include year, semester, course name, section_id and the number of students in each section. Arrange the table by semester so that all of the Fall sections are listed first.
library(tidyverse)library(DBI)library(RSQLite)# Connect to the databasedb <-dbConnect(SQLite(), dbname ="Grades.sqlite")# Load the tablesgrades <-tbl(db, "grades")sections <-tbl(db, "sections")# Join, filter for 2015, and summarizeenrollments_2015 <- grades |>inner_join(sections, by ="section_id") |>filter(year ==2015| year =="2015") |>group_by(year, semester, name, section_id) |>summarize(num_students =n(), .groups ="drop") |>collect() |>mutate(semester =factor(semester, levels =c("Fall", "Spring", "Summer")) ) |>arrange(semester, name)View(enrollments_2015)print(enrollments_2015)
# A tibble: 6 × 5
year semester name section_id num_students
<chr> <fct> <chr> <chr> <int>
1 2015 Fall BUS 377 68813 36
2 2015 Fall MBA 676 38737 33
3 2015 Fall MBA 676 86362 39
4 2015 Spring BUS 345 25822 31
5 2015 Spring MBA 674 29369 24
6 2015 Spring MBA 674 42666 40
Recreate the graph below showing average final grade by section for 2015. The vertical red line showing the final average across all sections for the year is added using geom_vline().
#Load tables, prepare data, Plot:
library(tidyverse)library(DBI)library(RSQLite)# Connect to databasedb <-dbConnect(SQLite(), dbname ="Grades.sqlite")# Load tablesgrades <-tbl(db, "grades")sections <-tbl(db, "sections")# Join, filter, and collect data for 2015section_data <- grades |>inner_join(sections, by ="section_id") |>filter(year =="2015") |>collect()# Combine course name and section IDsection_data <- section_data |>mutate(section_label =paste(name, section_id, sep ="-"))# Compute average final grade per sectionavg_final_by_section <- section_data |>group_by(section_label) |>summarize(avg_final =mean(final_avg, na.rm =TRUE), .groups ="drop")# Set BUS 345-25822 as the first (top) sectionavg_final_by_section <- avg_final_by_section |>mutate(section_label =factor(section_label, levels =c("BUS 345-25822",setdiff(section_label, "BUS 345-25822") )))# Compute overall averageoverall_avg <-mean(avg_final_by_section$avg_final, na.rm =TRUE)# Create the plotggplot(avg_final_by_section, aes(x = avg_final, y = section_label)) +geom_col(fill ="blue") +geom_vline(xintercept = overall_avg, color ="red", linewidth =1) +labs(title ="Average final grade by section, 2015",x ="Average final grade",y ="Section",caption ="Red line is the overall average for the year across all sections" ) +theme_minimal()
Exercise 4
Display a list of students (student_id, last_name, first_name) for all students that failed (i.e., final_avg < 65) MBA 674 in the Spring of 2015.
#Connect & load tables &Filter students who failed MBA 674 (Spring 2015)
library(tidyverse)library(DBI)library(RSQLite)# Connect to the databasedb <-dbConnect(SQLite(), dbname ="Grades.sqlite")# Load tablesgrades <-tbl(db, "grades")sections <-tbl(db, "sections")students <-tbl(db, "students")# Query: Students who failed MBA 674 in Spring 2015mba674_fails <- grades |>inner_join(sections, by ="section_id") |>inner_join(students, by ="student_id") |>filter( final_avg <65, semester =="Spring", year =="2015", name =="MBA 674" ) |>select(student_id, last_name, first_name) |>arrange(last_name, first_name) |>collect()# View the resultprint(mba674_fails)