Part 1

packages

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
transactions <- transactions_sample
products
## # A tibble: 92,331 × 7
##    product_id manufacturer_id department    brand  product_category product_type
##    <chr>      <chr>           <chr>         <fct>  <chr>            <chr>       
##  1 25671      2               GROCERY       Natio… FRZN ICE         ICE - CRUSH…
##  2 26081      2               MISCELLANEOUS Natio… <NA>             <NA>        
##  3 26093      69              PASTRY        Priva… BREAD            BREAD:ITALI…
##  4 26190      69              GROCERY       Priva… FRUIT - SHELF S… APPLE SAUCE 
##  5 26355      69              GROCERY       Priva… COOKIES/CONES    SPECIALTY C…
##  6 26426      69              GROCERY       Priva… SPICES & EXTRAC… SPICES & SE…
##  7 26540      69              GROCERY       Priva… COOKIES/CONES    TRAY PACK/C…
##  8 26601      69              DRUG GM       Priva… VITAMINS         VITAMIN - M…
##  9 26636      69              PASTRY        Priva… BREAKFAST SWEETS SW GDS: SW …
## 10 26691      16              GROCERY       Priva… PNT BTR/JELLY/J… HONEY       
## # ℹ 92,321 more rows
## # ℹ 1 more variable: package_size <chr>

Excercise 1

transactions <- transactions %>%
  mutate(
    regular_price = (sales_value + retail_disc + coupon_match_disc) / quantity,
    loyalty_price = (sales_value + coupon_match_disc) / quantity,
    coupon_price = (sales_value - coupon_disc) / quantity
  ) %>%
  select(regular_price, loyalty_price, coupon_price, product_id, everything())

# Q1
transactions %>%
  slice_max(order_by = loyalty_price, n = 5)
## # A tibble: 5 × 14
##   regular_price loyalty_price coupon_price product_id household_id store_id
##           <dbl>         <dbl>        <dbl> <chr>      <chr>        <chr>   
## 1         Inf           Inf          Inf   13945244   2491         389     
## 2         100           100          100   12484608   1246         334     
## 3         100.          100.         100.  13040176   2318         381     
## 4          88.9          88.9         88.9 15630122   1172         396     
## 5          85.0          66.1         66.1 916561     2312         442     
## # ℹ 8 more variables: basket_id <chr>, quantity <dbl>, sales_value <dbl>,
## #   retail_disc <dbl>, coupon_disc <dbl>, coupon_match_disc <dbl>, week <int>,
## #   transaction_timestamp <dttm>
# Q2
transactions %>%
  filter(quantity > 0) %>%
  slice_max(order_by = loyalty_price, n = 5)
## # A tibble: 5 × 14
##   regular_price loyalty_price coupon_price product_id household_id store_id
##           <dbl>         <dbl>        <dbl> <chr>      <chr>        <chr>   
## 1         100           100          100   12484608   1246         334     
## 2         100.          100.         100.  13040176   2318         381     
## 3          88.9          88.9         88.9 15630122   1172         396     
## 4          85.0          66.1         66.1 916561     2312         442     
## 5          63.8          63.8         63.8 1076056    57           298     
## # ℹ 8 more variables: basket_id <chr>, quantity <dbl>, sales_value <dbl>,
## #   retail_disc <dbl>, coupon_disc <dbl>, coupon_match_disc <dbl>, week <int>,
## #   transaction_timestamp <dttm>
#Q3
products %>%
  filter(product_id == '12484608')
## # A tibble: 1 × 7
##   product_id manufacturer_id department    brand   product_category product_type
##   <chr>      <chr>           <chr>         <fct>   <chr>            <chr>       
## 1 12484608   903             MISCELLANEOUS Private COUPON/MISC ITE… ELECTRONIC …
## # ℹ 1 more variable: package_size <chr>

Excercise 2

# how many products had a regular price of $1 or less
transactions %>%
  filter(regular_price <= 1) %>%
  select(product_id) %>%
  n_distinct()
## [1] 2748
# how many products had a loyalty price of $1 or less
transactions %>%
  filter(loyalty_price <= 1) %>%
  select(product_id) %>%
  n_distinct()
## [1] 4648
# how many products had a coupon price of $1 or less
transactions %>%
  filter(coupon_price <= 1) %>%
  select(product_id) %>%
  n_distinct()
## [1] 4844

Excercise 3

transactions %>%
  group_by(basket_id) %>%
  summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  summarize(
    prop_10 = mean(total_sales > 10),
    prop_20 = mean(total_sales > 20)
  )
## # A tibble: 1 × 2
##   prop_10 prop_20
##     <dbl>   <dbl>
## 1   0.107  0.0258

Excercise 4

# Which stores had the largest total `sales_value`
transactions %>%
  group_by(sales_value) %>%
  summarize(total_sales_value = sum(sales_value, na.rm = TRUE)) %>%
  arrange(desc(total_sales_value))
## # A tibble: 1,850 × 2
##    sales_value total_sales_value
##          <dbl>             <dbl>
##  1        2                8838 
##  2        2.99             8369.
##  3        2.5              7430 
##  4        3.99             6576.
##  5        1                5066 
##  6        3                4578 
##  7        4.99             4456.
##  8        5                4330 
##  9        1.99             4310.
## 10        5.99             4067.
## # ℹ 1,840 more rows
# Which stores had the largest average loyalty discount
transactions %>%
  mutate(pct_loyalty_disc = 1 - (loyalty_price / regular_price)) %>%
  group_by(store_id) %>%
  summarize(avg_pct_loyalty_disc = mean(loyalty_price, na.rm = TRUE)) %>%
  arrange(desc(avg_pct_loyalty_disc))
## # A tibble: 293 × 2
##    store_id avg_pct_loyalty_disc
##    <chr>                   <dbl>
##  1 389                    Inf   
##  2 602                     24.0 
##  3 3113                    20   
##  4 108                     14.0 
##  5 3163                    11.3 
##  6 2867                    10.5 
##  7 3327                     8.55
##  8 3253                     8.00
##  9 197                      6.35
## 10 1829                     6.16
## # ℹ 283 more rows

Part 2

Excercise 1

library(readxl)
excel_sheets(path = "data/mbta.xlsx")
## [1] "Sheet1"
mbta <- read_excel(path = "data/mbta.xlsx", skip = 1, na = "NA")
## New names:
## • `` -> `...1`

Excercise 2

# Structure of mbta
mbta
## # A tibble: 11 × 60
##     ...1 mode        `2007-01` `2007-02` `2007-03` `2007-04` `2007-05` `2007-06`
##    <dbl> <chr>           <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
##  1     1 All Modes …     NA        NA     1188.       NA        NA      1246.   
##  2     2 Boat             4         3.6     40         4.3       4.9       5.8  
##  3     3 Bus            336.      339.     340.      352.      354.      351.   
##  4     4 Commuter R…    142.      138.     138.      140.      139       143    
##  5     5 Heavy Rail     435.      448.     459.      472.      475.      477.   
##  6     6 Light Rail     227.      240.     241.      256.      248.      246.   
##  7     7 Pct Chg / …      0.02     -0.04     0.114    -0.002     0.049     0.096
##  8     8 Private Bus      4.77      4.42     4.57      4.54      4.77      4.72 
##  9     9 RIDE             4.9       5        5.5       5.4       5.4       5.6  
## 10    10 Trackless …     12.8      12.9     13.1      13.4      13.5      13.3  
## 11    11 TOTAL         1167.     1192.    1205.     1247.     1245.     1246.   
## # ℹ 52 more variables: `2007-07` <dbl>, `2007-08` <dbl>, `2007-09` <dbl>,
## #   `2007-10` <dbl>, `2007-11` <dbl>, `2007-12` <dbl>, `2008-01` <dbl>,
## #   `2008-02` <dbl>, `2008-03` <dbl>, `2008-04` <dbl>, `2008-05` <dbl>,
## #   `2008-06` <dbl>, `2008-07` <dbl>, `2008-08` <dbl>, `2008-09` <dbl>,
## #   `2008-10` <dbl>, `2008-11` <dbl>, `2008-12` <dbl>, `2009-01` <dbl>,
## #   `2009-02` <dbl>, `2009-03` <dbl>, `2009-04` <dbl>, `2009-05` <dbl>,
## #   `2009-06` <dbl>, `2009-07` <dbl>, `2009-08` <dbl>, `2009-09` <dbl>, …
# View first 6 rows
head(mbta, n=6)
## # A tibble: 6 × 60
##    ...1 mode         `2007-01` `2007-02` `2007-03` `2007-04` `2007-05` `2007-06`
##   <dbl> <chr>            <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
## 1     1 All Modes b…       NA       NA       1188.      NA        NA      1246. 
## 2     2 Boat                4        3.6       40        4.3       4.9       5.8
## 3     3 Bus               336.     339.       340.     352.      354.      351. 
## 4     4 Commuter Ra…      142.     138.       138.     140.      139       143  
## 5     5 Heavy Rail        435.     448.       459.     472.      475.      477. 
## 6     6 Light Rail        227.     240.       241.     256.      248.      246. 
## # ℹ 52 more variables: `2007-07` <dbl>, `2007-08` <dbl>, `2007-09` <dbl>,
## #   `2007-10` <dbl>, `2007-11` <dbl>, `2007-12` <dbl>, `2008-01` <dbl>,
## #   `2008-02` <dbl>, `2008-03` <dbl>, `2008-04` <dbl>, `2008-05` <dbl>,
## #   `2008-06` <dbl>, `2008-07` <dbl>, `2008-08` <dbl>, `2008-09` <dbl>,
## #   `2008-10` <dbl>, `2008-11` <dbl>, `2008-12` <dbl>, `2009-01` <dbl>,
## #   `2009-02` <dbl>, `2009-03` <dbl>, `2009-04` <dbl>, `2009-05` <dbl>,
## #   `2009-06` <dbl>, `2009-07` <dbl>, `2009-08` <dbl>, `2009-09` <dbl>, …
# Summary of mbta
summary(mbta)
##       ...1          mode              2007-01            2007-02        
##  Min.   : 1.0   Length:11          Min.   :   0.020   Min.   :  -0.040  
##  1st Qu.: 3.5   Class :character   1st Qu.:   4.804   1st Qu.:   4.563  
##  Median : 6.0   Mode  :character   Median :  77.478   Median :  75.707  
##  Mean   : 6.0                      Mean   : 233.397   Mean   : 238.324  
##  3rd Qu.: 8.5                      3rd Qu.: 308.672   3rd Qu.: 314.072  
##  Max.   :11.0                      Max.   :1166.974   Max.   :1191.639  
##                                    NA's   :1          NA's   :1         
##     2007-03            2007-04            2007-05            2007-06        
##  Min.   :   0.114   Min.   :  -0.002   Min.   :   0.049   Min.   :   0.096  
##  1st Qu.:   9.278   1st Qu.:   4.756   1st Qu.:   5.025   1st Qu.:   5.700  
##  Median : 137.700   Median :  76.472   Median :  76.240   Median : 143.000  
##  Mean   : 330.293   Mean   : 249.421   Mean   : 248.956   Mean   : 339.846  
##  3rd Qu.: 399.225   3rd Qu.: 328.011   3rd Qu.: 327.841   3rd Qu.: 413.788  
##  Max.   :1204.725   Max.   :1247.105   Max.   :1244.755   Max.   :1246.129  
##                     NA's   :1          NA's   :1                            
##     2007-07            2007-08            2007-09            2007-10        
##  Min.   :  -0.037   Min.   :   0.004   Min.   :  -0.007   Min.   :  -0.064  
##  1st Qu.:   5.570   1st Qu.:   5.624   1st Qu.:   5.539   1st Qu.:   5.310  
##  Median :  77.851   Median :  77.753   Median : 143.051   Median :  80.582  
##  Mean   : 248.787   Mean   : 244.665   Mean   : 352.554   Mean   : 248.884  
##  3rd Qu.: 328.961   3rd Qu.: 325.336   3rd Qu.: 436.082   3rd Qu.: 336.994  
##  Max.   :1243.952   Max.   :1223.323   Max.   :1310.764   Max.   :1244.453  
##  NA's   :1          NA's   :1                             NA's   :1         
##     2007-11            2007-12            2008-01            2008-02        
##  Min.   :  -0.077   Min.   :  -0.060   Min.   :   0.048   Min.   :   0.061  
##  1st Qu.:   4.478   1st Qu.:   4.385   1st Qu.:   4.475   1st Qu.:   4.485  
##  Median :  79.356   Median : 141.585   Median :  78.023   Median :  78.389  
##  Mean   : 248.371   Mean   : 321.588   Mean   : 244.615   Mean   : 252.803  
##  3rd Qu.: 310.744   3rd Qu.: 380.594   3rd Qu.: 315.549   3rd Qu.: 327.005  
##  Max.   :1241.895   Max.   :1216.890   Max.   :1223.050   Max.   :1263.983  
##  NA's   :1                             NA's   :1          NA's   :1         
##     2008-03            2008-04            2008-05            2008-06        
##  Min.   :   0.058   Min.   :   0.060   Min.   :   0.046   Min.   :   0.060  
##  1st Qu.:   5.170   1st Qu.:   4.689   1st Qu.:   4.629   1st Qu.:   5.742  
##  Median : 137.453   Median :  77.555   Median :  78.506   Median : 142.057  
##  Mean   : 345.604   Mean   : 264.435   Mean   : 260.323   Mean   : 359.667  
##  3rd Qu.: 427.601   3rd Qu.: 343.909   3rd Qu.: 337.515   3rd Qu.: 440.656  
##  Max.   :1274.031   Max.   :1322.146   Max.   :1301.591   Max.   :1320.728  
##                     NA's   :1          NA's   :1                            
##     2008-07            2008-08            2008-09            2008-10        
##  Min.   :   0.069   Min.   :   0.023   Min.   :   0.021   Min.   :   0.054  
##  1st Qu.:   6.019   1st Qu.:   5.887   1st Qu.:   5.691   1st Qu.:   5.087  
##  Median :  80.061   Median :  79.141   Median : 141.907   Median :  82.486  
##  Mean   : 266.027   Mean   : 250.383   Mean   : 362.099   Mean   : 262.440  
##  3rd Qu.: 345.730   3rd Qu.: 330.629   3rd Qu.: 453.430   3rd Qu.: 330.996  
##  Max.   :1330.103   Max.   :1251.905   Max.   :1338.015   Max.   :1312.172  
##  NA's   :1          NA's   :1                             NA's   :1         
##     2008-11            2008-12            2009-01            2009-02       
##  Min.   :   0.002   Min.   :  -0.015   Min.   :  -0.034   Min.   :  -0.02  
##  1st Qu.:   4.829   1st Qu.:   4.689   1st Qu.:   4.186   1st Qu.:   4.38  
##  Median :  82.774   Median : 140.810   Median :  76.874   Median :  76.59  
##  Mean   : 248.871   Mean   : 319.882   Mean   : 236.303   Mean   : 247.65  
##  3rd Qu.: 317.084   3rd Qu.: 386.255   3rd Qu.: 307.814   3rd Qu.: 327.49  
##  Max.   :1244.354   Max.   :1232.655   Max.   :1181.534   Max.   :1238.24  
##  NA's   :1                             NA's   :1          NA's   :1        
##     2009-03            2009-04            2009-05            2009-06        
##  Min.   :  -0.050   Min.   :  -0.048   Min.   :  -0.058   Min.   :  -0.079  
##  1st Qu.:   5.003   1st Qu.:   4.720   1st Qu.:   4.763   1st Qu.:   5.845  
##  Median : 142.893   Median :  76.833   Median :  78.358   Median : 142.006  
##  Mean   : 330.142   Mean   : 251.603   Mean   : 245.116   Mean   : 333.194  
##  3rd Qu.: 410.455   3rd Qu.: 329.093   3rd Qu.: 322.303   3rd Qu.: 410.482  
##  Max.   :1210.912   Max.   :1258.037   Max.   :1225.608   Max.   :1233.085  
##                     NA's   :1          NA's   :1                            
##     2009-07            2009-08            2009-09            2009-10        
##  Min.   :  -0.094   Min.   :  -0.044   Min.   :  -0.035   Min.   :  -0.014  
##  1st Qu.:   6.298   1st Qu.:   6.033   1st Qu.:   5.693   1st Qu.:   4.883  
##  Median :  74.558   Median :  75.604   Median : 139.087   Median :  75.178  
##  Mean   : 241.006   Mean   : 239.427   Mean   : 346.687   Mean   : 258.811  
##  3rd Qu.: 312.185   3rd Qu.: 307.442   3rd Qu.: 437.332   3rd Qu.: 347.086  
##  Max.   :1205.079   Max.   :1197.158   Max.   :1291.564   Max.   :1294.064  
##  NA's   :1          NA's   :1                             NA's   :1         
##     2009-11            2009-12            2010-01            2010-02        
##  Min.   :  -0.022   Min.   :  -0.022   Min.   :   0.004   Min.   :  -0.022  
##  1st Qu.:   4.323   1st Qu.:   4.784   1st Qu.:   4.034   1st Qu.:   4.062  
##  Median :  70.997   Median : 126.066   Median :  71.588   Median :  72.238  
##  Mean   : 243.363   Mean   : 312.962   Mean   : 237.255   Mean   : 242.244  
##  3rd Qu.: 328.913   3rd Qu.: 386.659   3rd Qu.: 322.769   3rd Qu.: 324.137  
##  Max.   :1216.824   Max.   :1207.845   Max.   :1186.271   Max.   :1211.228  
##  NA's   :1                             NA's   :1          NA's   :1         
##     2010-03            2010-04            2010-05            2010-06        
##  Min.   :   0.012   Min.   :   0.007   Min.   :   0.013   Min.   :   0.008  
##  1st Qu.:   5.274   1st Qu.:   5.130   1st Qu.:   5.086   1st Qu.:   6.436  
##  Median : 131.252   Median :  72.370   Median :  70.785   Median : 129.144  
##  Mean   : 332.726   Mean   : 253.446   Mean   : 248.231   Mean   : 335.964  
##  3rd Qu.: 428.420   3rd Qu.: 340.770   3rd Qu.: 339.579   3rd Qu.: 426.769  
##  Max.   :1225.556   Max.   :1267.226   Max.   :1241.148   Max.   :1244.409  
##                     NA's   :1          NA's   :1                            
##     2010-07            2010-08            2010-09            2010-10        
##  Min.   :   0.001   Min.   :  -0.015   Min.   :   0.001   Min.   :   0.009  
##  1st Qu.:   6.531   1st Qu.:   6.281   1st Qu.:   5.567   1st Qu.:   5.006  
##  Median :  64.950   Median :  68.388   Median : 132.892   Median :  69.340  
##  Mean   : 241.180   Mean   : 235.947   Mean   : 346.524   Mean   : 261.255  
##  3rd Qu.: 319.462   3rd Qu.: 310.399   3rd Qu.: 451.361   3rd Qu.: 347.832  
##  Max.   :1205.901   Max.   :1179.745   Max.   :1293.117   Max.   :1306.271  
##  NA's   :1          NA's   :1                             NA's   :1         
##     2010-11            2010-12            2011-01            2011-02        
##  Min.   :   0.022   Min.   :  -0.004   Min.   :  -0.028   Min.   :   0.008  
##  1st Qu.:   4.402   1st Qu.:   4.466   1st Qu.:   4.039   1st Qu.:   4.329  
##  Median :  69.166   Median : 121.422   Median :  69.750   Median :  68.579  
##  Mean   : 248.748   Mean   : 312.917   Mean   : 230.680   Mean   : 244.133  
##  3rd Qu.: 331.437   3rd Qu.: 388.385   3rd Qu.: 300.831   3rd Qu.: 314.647  
##  Max.   :1243.730   Max.   :1216.262   Max.   :1153.413   Max.   :1220.663  
##  NA's   :1                             NA's   :1          NA's   :1         
##     2011-03           2011-04            2011-05            2011-06        
##  Min.   :   0.05   Min.   :   0.036   Min.   :   0.050   Min.   :   0.054  
##  1st Qu.:   6.03   1st Qu.:   5.224   1st Qu.:   5.338   1st Qu.:   6.926  
##  Median : 134.37   Median :  73.384   Median :  74.216   Median : 135.581  
##  Mean   : 345.17   Mean   : 262.660   Mean   : 260.582   Mean   : 353.331  
##  3rd Qu.: 448.56   3rd Qu.: 345.904   3rd Qu.: 345.411   3rd Qu.: 452.923  
##  Max.   :1286.66   Max.   :1313.283   Max.   :1302.884   Max.   :1302.414  
##                    NA's   :1          NA's   :1                            
##     2011-07            2011-08            2011-09            2011-10        
##  Min.   :   0.067   Min.   :   0.052   Min.   :   0.043   Min.   :   0.032  
##  1st Qu.:   6.911   1st Qu.:   7.067   1st Qu.:   6.660   1st Qu.:   5.513  
##  Median :  71.735   Median :  70.853   Median : 136.901   Median :  70.508  
##  Mean   : 257.228   Mean   : 248.259   Mean   : 362.555   Mean   : 269.648  
##  3rd Qu.: 328.941   3rd Qu.: 320.386   3rd Qu.: 469.204   3rd Qu.: 358.284  
##  Max.   :1286.107   Max.   :1241.268   Max.   :1348.754   Max.   :1348.222  
##  NA's   :1          NA's   :1                             NA's   :1
# How many missing values in each column
sum(is.na(mbta))
## [1] 39

Excercise 3

mbta <- mbta %>%
  slice(-c(1, 7, 11)) %>% # Remove the first, seventh, and eleventh rows of mbta.
  select(-1) # Remove the first column.

dim(mbta) # Now what is the dimensions of this new data frame?
## [1]  8 59

Excercise 4

mbta <- mbta %>%
pivot_longer(cols = 3, names_to = 'RIDE', values_to = 'thou_riders') # Pivot the rows and columns of the mbta data so that all
# columns are variables of the data. This should result in
# 3 columns - `mode`, `date`, and number of riders in
# thousands (`thou_riders`).
dim(mbta) # Now what is the dimensions of this new data frame?
## [1]  8 60

Excercise 5

mbta <- mbta %>%
separate(col = 'mode', into = c("year", "month"), sep = "-") # Split the month column of mbta at the dash and create a new
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 8 rows [1, 2, 3, 4, 5, 6,
## 7, 8].
# month column with only the month and a year column with only
# the year.
head(mbta) # View the head of this new mbta data set.
## # A tibble: 6 × 61
##   year         month `2007-01` `2007-03` `2007-04` `2007-05` `2007-06` `2007-07`
##   <chr>        <chr>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
## 1 Boat         <NA>       4        40         4.3       4.9       5.8       6.52
## 2 Bus          <NA>     336.      340.      352.      354.      351.      358.  
## 3 Commuter Ra… <NA>     142.      138.      140.      139       143       142.  
## 4 Heavy Rail   <NA>     435.      459.      472.      475.      477.      472.  
## 5 Light Rail   <NA>     227.      241.      256.      248.      246.      243.  
## 6 Private Bus  <NA>       4.77      4.57      4.54      4.77      4.72      3.94
## # ℹ 53 more variables: `2007-08` <dbl>, `2007-09` <dbl>, `2007-10` <dbl>,
## #   `2007-11` <dbl>, `2007-12` <dbl>, `2008-01` <dbl>, `2008-02` <dbl>,
## #   `2008-03` <dbl>, `2008-04` <dbl>, `2008-05` <dbl>, `2008-06` <dbl>,
## #   `2008-07` <dbl>, `2008-08` <dbl>, `2008-09` <dbl>, `2008-10` <dbl>,
## #   `2008-11` <dbl>, `2008-12` <dbl>, `2009-01` <dbl>, `2009-02` <dbl>,
## #   `2009-03` <dbl>, `2009-04` <dbl>, `2009-05` <dbl>, `2009-06` <dbl>,
## #   `2009-07` <dbl>, `2009-08` <dbl>, `2009-09` <dbl>, `2009-10` <dbl>, …

Excercise 7

mbta %>%
  group_by('mode') %>%
  summarize(avg_ridership = mean(thou_riders))
## # A tibble: 1 × 2
##   `"mode"` avg_ridership
##   <chr>            <dbl>
## 1 mode              149.
mbta %>%
  filter(month == '01') %>%
  group_by('mode') %>%
  summarize(avg_ridership = mean(thou_riders))
## # A tibble: 0 × 2
## # ℹ 2 variables: "mode" <chr>, avg_ridership <dbl>