Loading Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(datasets)
library(dbplyr)
## 
## Attaching package: 'dbplyr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     ident, sql
view(mpg)

Selecting Key Columns

mpg %>%
  select(manufacturer, class, cty, hwy)%>%
  head(10)
## # A tibble: 10 × 4
##    manufacturer class     cty   hwy
##    <chr>        <chr>   <int> <int>
##  1 audi         compact    18    29
##  2 audi         compact    21    29
##  3 audi         compact    20    31
##  4 audi         compact    21    30
##  5 audi         compact    16    26
##  6 audi         compact    18    26
##  7 audi         compact    18    27
##  8 audi         compact    18    26
##  9 audi         compact    16    25
## 10 audi         compact    20    28

Creating New Column

library("dplyr")

print("mpg")
## [1] "mpg"
print(mpg)
## # A tibble: 234 × 11
##    manufacturer model      displ  year   cyl trans drv     cty   hwy fl    class
##    <chr>        <chr>      <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
##  1 audi         a4           1.8  1999     4 auto… f        18    29 p     comp…
##  2 audi         a4           1.8  1999     4 manu… f        21    29 p     comp…
##  3 audi         a4           2    2008     4 manu… f        20    31 p     comp…
##  4 audi         a4           2    2008     4 auto… f        21    30 p     comp…
##  5 audi         a4           2.8  1999     6 auto… f        16    26 p     comp…
##  6 audi         a4           2.8  1999     6 manu… f        18    26 p     comp…
##  7 audi         a4           3.1  2008     6 auto… f        18    27 p     comp…
##  8 audi         a4 quattro   1.8  1999     4 manu… 4        18    26 p     comp…
##  9 audi         a4 quattro   1.8  1999     4 auto… 4        16    25 p     comp…
## 10 audi         a4 quattro   2    2008     4 manu… 4        20    28 p     comp…
## # ℹ 224 more rows
mpg <- mutate(mpg, avg_mpg = rowMeans(select(mpg,
                                              c(cty,hwy)), na.rm = TRUE))
mpg$avg_mpg <- as.numeric(mpg$avg_mpg)

print(mpg)
## # A tibble: 234 × 12
##    manufacturer model      displ  year   cyl trans drv     cty   hwy fl    class
##    <chr>        <chr>      <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
##  1 audi         a4           1.8  1999     4 auto… f        18    29 p     comp…
##  2 audi         a4           1.8  1999     4 manu… f        21    29 p     comp…
##  3 audi         a4           2    2008     4 manu… f        20    31 p     comp…
##  4 audi         a4           2    2008     4 auto… f        21    30 p     comp…
##  5 audi         a4           2.8  1999     6 auto… f        16    26 p     comp…
##  6 audi         a4           2.8  1999     6 manu… f        18    26 p     comp…
##  7 audi         a4           3.1  2008     6 auto… f        18    27 p     comp…
##  8 audi         a4 quattro   1.8  1999     4 manu… 4        18    26 p     comp…
##  9 audi         a4 quattro   1.8  1999     4 auto… 4        16    25 p     comp…
## 10 audi         a4 quattro   2    2008     4 manu… 4        20    28 p     comp…
## # ℹ 224 more rows
## # ℹ 1 more variable: avg_mpg <dbl>

Renaming Columns

 mpg <- mpg %>%
  rename(brand = manufacturer) 
 mpg <- mpg %>%
  rename(vehicle_type = class) 

Filtering Dataset

mpg%>%
  filter(avg_mpg >= 25)%>%
  drop_na(vehicle_type)
## # A tibble: 43 × 12
##    brand     model  displ  year   cyl trans drv     cty   hwy fl    vehicle_type
##    <chr>     <chr>  <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>       
##  1 audi      a4       1.8  1999     4 manu… f        21    29 p     compact     
##  2 audi      a4       2    2008     4 manu… f        20    31 p     compact     
##  3 audi      a4       2    2008     4 auto… f        21    30 p     compact     
##  4 chevrolet malibu   2.4  2008     4 auto… f        22    30 r     midsize     
##  5 honda     civic    1.6  1999     4 manu… f        28    33 r     subcompact  
##  6 honda     civic    1.6  1999     4 auto… f        24    32 r     subcompact  
##  7 honda     civic    1.6  1999     4 manu… f        25    32 r     subcompact  
##  8 honda     civic    1.6  1999     4 manu… f        23    29 p     subcompact  
##  9 honda     civic    1.6  1999     4 auto… f        24    32 r     subcompact  
## 10 honda     civic    1.8  2008     4 manu… f        26    34 r     subcompact  
## # ℹ 33 more rows
## # ℹ 1 more variable: avg_mpg <dbl>

Creating Summary Table(s)

I can’t figure out how to get these on the same summary table. Every way I think of breaks it, so I have two tables.

mpg%>%
  drop_na() %>%
  group_by(brand, vehicle_type)%>%
  summarise('average mpg' = (mean(avg_mpg)))
## `summarise()` has grouped output by 'brand'. You can override using the
## `.groups` argument.
## # A tibble: 32 × 3
## # Groups:   brand [15]
##    brand     vehicle_type `average mpg`
##    <chr>     <chr>                <dbl>
##  1 audi      compact               22.4
##  2 audi      midsize               20  
##  3 chevrolet 2seater               20.1
##  4 chevrolet midsize               23.2
##  5 chevrolet suv                   14.9
##  6 dodge     minivan               19.1
##  7 dodge     pickup                14.1
##  8 dodge     suv                   13.9
##  9 ford      pickup                14.7
## 10 ford      subcompact            19.6
## # ℹ 22 more rows
mpg%>%
  drop_na() %>%
  group_by(brand, vehicle_type)%>%
  summarise('how many in each group' = n())
## `summarise()` has grouped output by 'brand'. You can override using the
## `.groups` argument.
## # A tibble: 32 × 3
## # Groups:   brand [15]
##    brand     vehicle_type `how many in each group`
##    <chr>     <chr>                           <int>
##  1 audi      compact                            15
##  2 audi      midsize                             3
##  3 chevrolet 2seater                             5
##  4 chevrolet midsize                             5
##  5 chevrolet suv                                 9
##  6 dodge     minivan                            11
##  7 dodge     pickup                             19
##  8 dodge     suv                                 7
##  9 ford      pickup                              7
## 10 ford      subcompact                          9
## # ℹ 22 more rows