library(RSQLite)
## Loading required package: DBI
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
db <- dbConnect(SQLite(), dbname = "Grades.sqlite")
dbListTables(db)
## [1] "grades"   "sections" "students"
dbListFields(db, "grades")
## [1] "student_id" "section_id" "final_avg"  "comments"
students <- dbGetQuery(conn = db, 
                       "SELECT * FROM students")
sections <- dbGetQuery(conn = db, 
                       "SELECT * FROM sections")
grades <- dbGetQuery(conn = db, 
                       "SELECT * FROM grades")
head(students, 1)
##   student_id last_name first_name
## 1    6161326  Harrison   Michelle
head(sections, 1)
##   section_id    name semester year
## 1      00638 MBA 676     Fall 2014
head(grades, 1)
##   student_id section_id final_avg                       comments
## 1    6161326      71114     67.85 didn't get it and was annoying

What is the mean final_avg?

dbGetQuery(conn = db, 
           "SELECT AVG(final_avg) FROM grades")
##   AVG(final_avg)
## 1       79.24869

Mean final_avg in dplyr

grades %>% summarize(mean(final_avg), na.rm = TRUE)
##   mean(final_avg) na.rm
## 1              NA  TRUE

Highest final_avg Fall 2016

dbGetQuery(conn = db, "
           SELECT MAX(final_avg)
           FROM grades
           INNER JOIN sections
           USING (section_id)
           WHERE semester = 'Fall' AND year = '2016'")
##   MAX(final_avg)
## 1           98.4

Highest final_avg using dplyr

grades %>% inner_join(sections, grades, by = "section_id") %>% filter(semester =='Fall' & year == '2016') %>% summarize(average = max(final_avg))
##   average
## 1    98.4

Which sections have an average final grade that is greater than 80?

dbGetQuery(conn = db, 
           "SELECT name, AVG(final_avg)
           FROM grades
           INNER JOIN sections
           USING (section_id)
           GROUP BY name
           HAVING AVG(final_avg) > 80")
##      name AVG(final_avg)
## 1 BUS 345       80.14684
## 2 BUS 377       80.41976
ddb <- src_sqlite("Grades.sqlite", create = FALSE)
tbl(ddb, "grades") %>% summarize(mean_grade = mean(final_avg))
## Source:   query [?? x 1]
## Database: sqlite 3.8.6 [Grades.sqlite]
## 
##   mean_grade
##        <dbl>
## 1   79.24869

What was the highest final_avg in Fall 2016?

tbl(ddb, "grades") %>% inner_join(tbl(ddb,"sections"), by = "section_id") %>% filter(semester == 'Fall' & year == '2016') %>% summarize(highestgrade = max(final_avg))
## Source:   query [?? x 1]
## Database: sqlite 3.8.6 [Grades.sqlite]
## 
##   highestgrade
##          <dbl>
## 1         98.4

What sections have an average grade that is higher than 80?

tbl(ddb, "grades") %>% inner_join(tbl(ddb, "sections"), by = "section_id") %>% group_by(name) %>% summarize(average = mean(final_avg)) %>% filter(average > 80)
## Source:   query [?? x 2]
## Database: sqlite 3.8.6 [Grades.sqlite]
## 
##      name  average
##     <chr>    <dbl>
## 1 BUS 345 80.14684
## 2 BUS 377 80.41976
tbl(ddb, "grades") %>% inner_join(tbl(ddb, "sections"), by = "section_id") %>% filter(year == '2016' & semester =='Fall') %>% summarize(highest_grade = max(final_avg)) %>% explain()
## <SQL>
## SELECT MAX(`final_avg`) AS `highest_grade`
## FROM (SELECT *
## FROM (SELECT * FROM `grades`
## 
## INNER JOIN
## 
## `sections`
## 
## USING (`section_id`))
## WHERE (`year` = '2016' AND `semester` = 'Fall'))
## 
## <PLAN>
##    addr       opcode p1 p2 p3           p4 p5 comment
## 1     0         Init  0 28  0              00    <NA>
## 2     1         Null  0  1  2              00    <NA>
## 3     2     OpenRead  2  6  0            4 00    <NA>
## 4     3     OpenRead  3  4  0            4 00    <NA>
## 5     4     OpenRead  4  5  0 k(2,nil,nil) 00    <NA>
## 6     5       Rewind  2 21  0              00    <NA>
## 7     6       Column  2  1  3              00    <NA>
## 8     7       IsNull  3 20  0              00    <NA>
## 9     8       SeekGE  4 20  3            1 00    <NA>
## 10    9        IdxGT  4 20  3            1 00    <NA>
## 11   10     IdxRowid  4  4  0              00    <NA>
## 12   11         Seek  3  4  0              00    <NA>
## 13   12       Column  3  3  5              00    <NA>
## 14   13           Ne  6 20  5     (BINARY) 69    <NA>
## 15   14       Column  3  2  7              00    <NA>
## 16   15           Ne  8 20  7     (BINARY) 69    <NA>
## 17   16       Column  2  2  9              00    <NA>
## 18   17 RealAffinity  9  0  0              00    <NA>
## 19   18      CollSeq  0  0  0     (BINARY) 00    <NA>
## 20   19      AggStep  0  9  1       max(1) 01    <NA>
## 21   20         Next  2  6  0              01    <NA>
## 22   21        Close  2  0  0              00    <NA>
## 23   22        Close  3  0  0              00    <NA>
## 24   23        Close  4  0  0              00    <NA>
## 25   24     AggFinal  1  1  0       max(1) 00    <NA>
## 26   25         Copy  1 10  0              00    <NA>
## 27   26    ResultRow 10  1  0              00    <NA>
## 28   27         Halt  0  0  0              00    <NA>
## 29   28  Transaction  0  0 51            0 01    <NA>
## 30   29    TableLock  0  6  0       grades 00    <NA>
## 31   30    TableLock  0  4  0     sections 00    <NA>
## 32   31      String8  0  6  0         2016 00    <NA>
## 33   32      String8  0  8  0         Fall 00    <NA>
## 34   33         Goto  0  1  0              00    <NA>