Question 1

a) See below!

b) See below!

cond <- dbConnect(RSQLite::SQLite(), ":memory:")

penguins <- read_excel("penguins.xls")

dbWriteTable(cond, "penguins", penguins)

tbl(cond, "penguins") %>% filter(sex == "male", species == "Adelie") %>% select(bill_length_mm, bill_depth_mm) %>% head(5)
## # Source:   SQL [?? x 2]
## # Database: sqlite 3.50.4 [:memory:]
##   bill_length_mm bill_depth_mm
##            <dbl>         <dbl>
## 1           39.1          18.7
## 2           39.3          20.6
## 3           39.2          19.6
## 4           38.6          21.2
## 5           34.6          21.1
tbl(cond, "penguins") %>% group_by(sex, species) %>%summarise(mean_mass = mean(body_mass_g, na.rm = TRUE), .groups = "drop") %>% print()
## # Source:   SQL [?? x 3]
## # Database: sqlite 3.50.4 [:memory:]
##   sex    species   mean_mass
##   <chr>  <chr>         <dbl>
## 1 <NA>   Adelie        3540 
## 2 <NA>   Gentoo        4588.
## 3 female Adelie        3369.
## 4 female Chinstrap     3527.
## 5 female Gentoo        4680.
## 6 male   Adelie        4043.
## 7 male   Chinstrap     3939.
## 8 male   Gentoo        5485.
penguins %>% group_by(sex, species) %>% summarise(mean_mass = mean(body_mass_g, na.rm = TRUE), .groups = "drop") %>% print()
## # A tibble: 8 × 3
##   sex    species   mean_mass
##   <chr>  <chr>         <dbl>
## 1 female Adelie        3369.
## 2 female Chinstrap     3527.
## 3 female Gentoo        4680.
## 4 male   Adelie        4043.
## 5 male   Chinstrap     3939.
## 6 male   Gentoo        5485.
## 7 <NA>   Adelie        3540 
## 8 <NA>   Gentoo        4588.
dbDisconnect(cond)

Question 2

a) See below!

b) See below!

cond <- dbConnect(RSQLite::SQLite(), ":memory:")
dbWriteTable(cond, "flights", flights)
dbWriteTable(cond, "planes", planes)

tbl(cond, "flights") %>% inner_join(tbl(cond, "planes"), by = "tailnum") %>% group_by(manufacturer) %>% summarise(td = sum(distance, na.rm = TRUE), .groups = "drop") %>% arrange(desc(td)) %>% collect() %>% print()
## # A tibble: 35 × 2
##    manufacturer                         td
##    <chr>                             <dbl>
##  1 BOEING                        129780208
##  2 AIRBUS                         67644103
##  3 AIRBUS INDUSTRIE               40117602
##  4 EMBRAER                        34604019
##  5 BOMBARDIER INC                 14990924
##  6 MCDONNELL DOUGLAS AIRCRAFT CO   8156412
##  7 MCDONNELL DOUGLAS               3841569
##  8 MCDONNELL DOUGLAS CORPORATION   1141928
##  9 CESSNA                           552090
## 10 CIRRUS DESIGN CORP               378957
## # ℹ 25 more rows

Question 3

a) See below!


SELECT dest, COUNT(dest) AS numofflights
FROM flights
GROUP BY dest
ORDER BY numofflights DESC
LIMIT 10;
Displaying records 1 - 10
dest numofflights
ORD 17283
ATL 17215
LAX 16174
BOS 15508
MCO 14082
CLT 14064
SFO 13331
FLL 12055
MIA 11728
DCA 9705

b) See below!


SELECT T2.* FROM flights AS T2
INNER JOIN 
(
SELECT dest, COUNT(dest) AS numofflights
FROM flights
GROUP BY dest
ORDER BY numofflights DESC
LIMIT 10
) 
AS T1 
ON T2.dest = T1.dest
ORDER BY numofflights DESC;
    
Displaying records 1 - 10
year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay carrier flight tailnum origin dest air_time distance hour minute time_hour
2013 1 1 554 558 -4 740 728 12 UA 1696 N39463 EWR ORD 150 719 5 58 1357034400
2013 1 1 558 600 -2 753 745 8 AA 301 N3ALAA LGA ORD 138 733 6 0 1357038000
2013 1 1 608 600 8 807 735 32 MQ 3768 N9EAMQ EWR ORD 139 719 6 0 1357038000
2013 1 1 629 630 -1 824 810 14 AA 303 N3CYAA LGA ORD 140 733 6 30 1357038000
2013 1 1 656 700 -4 854 850 4 AA 305 N4WNAA LGA ORD 143 733 7 0 1357041600
2013 1 1 709 700 9 852 832 20 UA 1092 N26226 LGA ORD 135 733 7 0 1357041600
2013 1 1 715 713 2 911 850 21 UA 544 N841UA EWR ORD 156 719 7 13 1357041600
2013 1 1 739 745 -6 918 930 -12 AA 309 N4WPAA LGA ORD 137 733 7 45 1357041600
2013 1 1 749 710 39 939 850 49 MQ 3737 N508MQ EWR ORD 148 719 7 10 1357041600
2013 1 1 828 830 -2 1027 1012 15 B6 905 N274JB JFK ORD 160 740 8 30 1357045200

c) See below!


SELECT  * FROM  flights
WHERE origin = 'EWR'
LIMIT 4;
4 records
year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay carrier flight tailnum origin dest air_time distance hour minute time_hour
2013 1 1 517 515 2 830 819 11 UA 1545 N14228 EWR IAH 227 1400 5 15 1357034400
2013 1 1 554 558 -4 740 728 12 UA 1696 N39463 EWR ORD 150 719 5 58 1357034400
2013 1 1 555 600 -5 913 854 19 B6 507 N516JB EWR FLL 158 1065 6 0 1357038000
2013 1 1 558 600 -2 923 937 -14 UA 1124 N53441 EWR SFO 361 2565 6 0 1357038000

d) See below!


SELECT month, day, dep_time, dep_delay, (dep_delay / 60.0) AS delay, carrier
FROM flights
WHERE  origin = 'EWR'
ORDER BY  delay DESC
LIMIT 4;
4 records
month day dep_time dep_delay delay carrier
1 10 1121 1126 18.76667 MQ
12 5 756 896 14.93333 AA
5 3 1133 878 14.63333 MQ
12 19 734 849 14.15000 DL

e) See below!


SELECT carrier, AVG(dep_delay) AS avgdelay, MIN(month) AS month,   MIN(day) AS day, MIN(dep_time) AS departuretime
FROM  flights
GROUP BY carrier
ORDER BY avgdelay DESC
LIMIT 4;
4 records
carrier avgdelay month day departuretime
F9 20.21554 1 1 40
EV 19.95539 1 1 1
YV 18.99633 1 1 558
FL 18.72607 1 1 2

f) See below!


SELECT carrier, AVG(dep_delay) AS avgdelay
FROM flights
GROUP BY carrier
ORDER BY avgdelay DESC
LIMIT 4;
4 records
carrier avgdelay
F9 20.21554
EV 19.95539
YV 18.99633
FL 18.72607

g) See below!

avg <- tbl(cond, "flights") %>% filter(origin == "EWR") %>% summarise(avgdelay = mean(dep_delay, na.rm = TRUE)) %>% pull(avgdelay)

tbl(cond, "flights") %>% filter(origin == "EWR", dep_delay > avg) %>% select(month, day, dep_time, dep_delay) %>% collect() %>% print()
## # A tibble: 28,942 × 4
##    month   day dep_time dep_delay
##    <int> <int>    <int>     <dbl>
##  1     1     1      632        24
##  2     1     1      732        47
##  3     1     1      749        39
##  4     1     1      906        23
##  5     1     1      930        25
##  6     1     1      953        32
##  7     1     1      957       144
##  8     1     1     1025        34
##  9     1     1     1033        16
## 10     1     1     1101        18
## # ℹ 28,932 more rows

g) Double check, See below!


SELECT month, day, dep_time, dep_delay
FROM flights
WHERE origin = 'EWR' 
AND dep_delay > 
(
SELECT AVG(dep_delay)
FROM flights
WHERE origin = 'EWR'
)
ORDER BY dep_delay DESC;
Displaying records 1 - 10
month day dep_time dep_delay
1 10 1121 1126
12 5 756 896
5 3 1133 878
12 19 734 849
12 17 705 845
11 3 603 798
2 24 1921 786
10 14 2042 702
7 7 2123 653
2 13 2022 592