library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(datasets)
library(dbplyr)
##
## Attaching package: 'dbplyr'
##
## The following objects are masked from 'package:dplyr':
##
## ident, sql
view(mpg)
mpg %>%
select(manufacturer, class, cty, hwy)%>%
head(10)
## # A tibble: 10 × 4
## manufacturer class cty hwy
## <chr> <chr> <int> <int>
## 1 audi compact 18 29
## 2 audi compact 21 29
## 3 audi compact 20 31
## 4 audi compact 21 30
## 5 audi compact 16 26
## 6 audi compact 18 26
## 7 audi compact 18 27
## 8 audi compact 18 26
## 9 audi compact 16 25
## 10 audi compact 20 28
library("dplyr")
print("mpg")
## [1] "mpg"
print(mpg)
## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manu… f 21 29 p comp…
## 3 audi a4 2 2008 4 manu… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manu… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto… f 18 27 p comp…
## 8 audi a4 quattro 1.8 1999 4 manu… 4 18 26 p comp…
## 9 audi a4 quattro 1.8 1999 4 auto… 4 16 25 p comp…
## 10 audi a4 quattro 2 2008 4 manu… 4 20 28 p comp…
## # ℹ 224 more rows
mpg <- mutate(mpg, avg_mpg = rowMeans(select(mpg,
c(cty,hwy)), na.rm = TRUE))
mpg$avg_mpg <- as.numeric(mpg$avg_mpg)
print(mpg)
## # A tibble: 234 × 12
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manu… f 21 29 p comp…
## 3 audi a4 2 2008 4 manu… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manu… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto… f 18 27 p comp…
## 8 audi a4 quattro 1.8 1999 4 manu… 4 18 26 p comp…
## 9 audi a4 quattro 1.8 1999 4 auto… 4 16 25 p comp…
## 10 audi a4 quattro 2 2008 4 manu… 4 20 28 p comp…
## # ℹ 224 more rows
## # ℹ 1 more variable: avg_mpg <dbl>
mpg <- mpg %>%
rename(brand = manufacturer)
mpg <- mpg %>%
rename(vehicle_type = class)
mpg%>%
filter(avg_mpg >= 25)%>%
drop_na(vehicle_type)
## # A tibble: 43 × 12
## brand model displ year cyl trans drv cty hwy fl vehicle_type
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 manu… f 21 29 p compact
## 2 audi a4 2 2008 4 manu… f 20 31 p compact
## 3 audi a4 2 2008 4 auto… f 21 30 p compact
## 4 chevrolet malibu 2.4 2008 4 auto… f 22 30 r midsize
## 5 honda civic 1.6 1999 4 manu… f 28 33 r subcompact
## 6 honda civic 1.6 1999 4 auto… f 24 32 r subcompact
## 7 honda civic 1.6 1999 4 manu… f 25 32 r subcompact
## 8 honda civic 1.6 1999 4 manu… f 23 29 p subcompact
## 9 honda civic 1.6 1999 4 auto… f 24 32 r subcompact
## 10 honda civic 1.8 2008 4 manu… f 26 34 r subcompact
## # ℹ 33 more rows
## # ℹ 1 more variable: avg_mpg <dbl>
I can’t figure out how to get these on the same summary table. Every way I think of breaks it, so I have two tables.
mpg%>%
drop_na() %>%
group_by(brand, vehicle_type)%>%
summarise('average mpg' = (mean(avg_mpg)))
## `summarise()` has grouped output by 'brand'. You can override using the
## `.groups` argument.
## # A tibble: 32 × 3
## # Groups: brand [15]
## brand vehicle_type `average mpg`
## <chr> <chr> <dbl>
## 1 audi compact 22.4
## 2 audi midsize 20
## 3 chevrolet 2seater 20.1
## 4 chevrolet midsize 23.2
## 5 chevrolet suv 14.9
## 6 dodge minivan 19.1
## 7 dodge pickup 14.1
## 8 dodge suv 13.9
## 9 ford pickup 14.7
## 10 ford subcompact 19.6
## # ℹ 22 more rows
mpg%>%
drop_na() %>%
group_by(brand, vehicle_type)%>%
summarise('how many in each group' = n())
## `summarise()` has grouped output by 'brand'. You can override using the
## `.groups` argument.
## # A tibble: 32 × 3
## # Groups: brand [15]
## brand vehicle_type `how many in each group`
## <chr> <chr> <int>
## 1 audi compact 15
## 2 audi midsize 3
## 3 chevrolet 2seater 5
## 4 chevrolet midsize 5
## 5 chevrolet suv 9
## 6 dodge minivan 11
## 7 dodge pickup 19
## 8 dodge suv 7
## 9 ford pickup 7
## 10 ford subcompact 9
## # ℹ 22 more rows