library(RSQLite)
## Loading required package: DBI
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
db <- dbConnect(SQLite(), dbname = "Grades.sqlite")
dbListTables(db)
## [1] "grades" "sections" "students"
dbListFields(db, "grades")
## [1] "student_id" "section_id" "final_avg" "comments"
students <- dbGetQuery(conn = db,
"SELECT * FROM students")
sections <- dbGetQuery(conn = db,
"SELECT * FROM sections")
grades <- dbGetQuery(conn = db,
"SELECT * FROM grades")
head(students, 1)
## student_id last_name first_name
## 1 6161326 Harrison Michelle
head(sections, 1)
## section_id name semester year
## 1 00638 MBA 676 Fall 2014
head(grades, 1)
## student_id section_id final_avg comments
## 1 6161326 71114 67.85 didn't get it and was annoying
What is the mean final_avg?
dbGetQuery(conn = db,
"SELECT AVG(final_avg) FROM grades")
## AVG(final_avg)
## 1 79.24869
Mean final_avg in dplyr
grades %>% summarize(mean(final_avg), na.rm = TRUE)
## mean(final_avg) na.rm
## 1 NA TRUE
Highest final_avg Fall 2016
dbGetQuery(conn = db, "
SELECT MAX(final_avg)
FROM grades
INNER JOIN sections
USING (section_id)
WHERE semester = 'Fall' AND year = '2016'")
## MAX(final_avg)
## 1 98.4
Highest final_avg using dplyr
grades %>% inner_join(sections, grades, by = "section_id") %>% filter(semester =='Fall' & year == '2016') %>% summarize(average = max(final_avg))
## average
## 1 98.4
Which sections have an average final grade that is greater than 80?
dbGetQuery(conn = db,
"SELECT name, AVG(final_avg)
FROM grades
INNER JOIN sections
USING (section_id)
GROUP BY name
HAVING AVG(final_avg) > 80")
## name AVG(final_avg)
## 1 BUS 345 80.14684
## 2 BUS 377 80.41976
ddb <- src_sqlite("Grades.sqlite", create = FALSE)
tbl(ddb, "grades") %>% summarize(mean_grade = mean(final_avg))
## Source: query [?? x 1]
## Database: sqlite 3.8.6 [Grades.sqlite]
##
## mean_grade
## <dbl>
## 1 79.24869
What was the highest final_avg in Fall 2016?
tbl(ddb, "grades") %>% inner_join(tbl(ddb,"sections"), by = "section_id") %>% filter(semester == 'Fall' & year == '2016') %>% summarize(highestgrade = max(final_avg))
## Source: query [?? x 1]
## Database: sqlite 3.8.6 [Grades.sqlite]
##
## highestgrade
## <dbl>
## 1 98.4
What sections have an average grade that is higher than 80?
tbl(ddb, "grades") %>% inner_join(tbl(ddb, "sections"), by = "section_id") %>% group_by(name) %>% summarize(average = mean(final_avg)) %>% filter(average > 80)
## Source: query [?? x 2]
## Database: sqlite 3.8.6 [Grades.sqlite]
##
## name average
## <chr> <dbl>
## 1 BUS 345 80.14684
## 2 BUS 377 80.41976
tbl(ddb, "grades") %>% inner_join(tbl(ddb, "sections"), by = "section_id") %>% filter(year == '2016' & semester =='Fall') %>% summarize(highest_grade = max(final_avg)) %>% explain()
## <SQL>
## SELECT MAX(`final_avg`) AS `highest_grade`
## FROM (SELECT *
## FROM (SELECT * FROM `grades`
##
## INNER JOIN
##
## `sections`
##
## USING (`section_id`))
## WHERE (`year` = '2016' AND `semester` = 'Fall'))
##
## <PLAN>
## addr opcode p1 p2 p3 p4 p5 comment
## 1 0 Init 0 28 0 00 <NA>
## 2 1 Null 0 1 2 00 <NA>
## 3 2 OpenRead 2 6 0 4 00 <NA>
## 4 3 OpenRead 3 4 0 4 00 <NA>
## 5 4 OpenRead 4 5 0 k(2,nil,nil) 00 <NA>
## 6 5 Rewind 2 21 0 00 <NA>
## 7 6 Column 2 1 3 00 <NA>
## 8 7 IsNull 3 20 0 00 <NA>
## 9 8 SeekGE 4 20 3 1 00 <NA>
## 10 9 IdxGT 4 20 3 1 00 <NA>
## 11 10 IdxRowid 4 4 0 00 <NA>
## 12 11 Seek 3 4 0 00 <NA>
## 13 12 Column 3 3 5 00 <NA>
## 14 13 Ne 6 20 5 (BINARY) 69 <NA>
## 15 14 Column 3 2 7 00 <NA>
## 16 15 Ne 8 20 7 (BINARY) 69 <NA>
## 17 16 Column 2 2 9 00 <NA>
## 18 17 RealAffinity 9 0 0 00 <NA>
## 19 18 CollSeq 0 0 0 (BINARY) 00 <NA>
## 20 19 AggStep 0 9 1 max(1) 01 <NA>
## 21 20 Next 2 6 0 01 <NA>
## 22 21 Close 2 0 0 00 <NA>
## 23 22 Close 3 0 0 00 <NA>
## 24 23 Close 4 0 0 00 <NA>
## 25 24 AggFinal 1 1 0 max(1) 00 <NA>
## 26 25 Copy 1 10 0 00 <NA>
## 27 26 ResultRow 10 1 0 00 <NA>
## 28 27 Halt 0 0 0 00 <NA>
## 29 28 Transaction 0 0 51 0 01 <NA>
## 30 29 TableLock 0 6 0 grades 00 <NA>
## 31 30 TableLock 0 4 0 sections 00 <NA>
## 32 31 String8 0 6 0 2016 00 <NA>
## 33 32 String8 0 8 0 Fall 00 <NA>
## 34 33 Goto 0 1 0 00 <NA>