knitr::opts_chunk$set(echo = TRUE)
#install.packages("dplyr","tidyverse")
#library("dplyr") - IT TURNS OUT THAT TIDYVERSE INCLUDES DPLYR
library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# knitr::kable(mpg, caption="MPG")
# The line above displays the entire table. I commented it out because the table is HUGE and I don't want to print a 234-row table in a HTML document.
head(mpg, 10)
## # A tibble: 10 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manu… f 21 29 p comp…
## 3 audi a4 2 2008 4 manu… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manu… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto… f 18 27 p comp…
## 8 audi a4 quattro 1.8 1999 4 manu… 4 18 26 p comp…
## 9 audi a4 quattro 1.8 1999 4 auto… 4 16 25 p comp…
## 10 audi a4 quattro 2 2008 4 manu… 4 20 28 p comp…
#THIS displays the first 10 rows of the data.
dim(mpg) # This gives me the dimensions of the data. 234 rows, 11 columns.
## [1] 234 11
names(mpg) #This shows me the names of all variables.
## [1] "manufacturer" "model" "displ" "year" "cyl"
## [6] "trans" "drv" "cty" "hwy" "fl"
## [11] "class"
mpgClean <- mpg %>%
select(manufacturer, class, cty, hwy) %>%
mutate(avg_mpg = (cty + hwy)/2) %>%
filter(avg_mpg>=25) %>%
drop_na(class) %>%
#Not gonna lie, I couldn't figure out how to remove certain rows (presumably observations) from the table, so I skipped it and moved onto the rest of the assignment. Sorry.
rename(brand = manufacturer) %>%
rename(vehicle_type = class) %>%
glimpse()
## Rows: 43
## Columns: 5
## $ brand <chr> "audi", "audi", "audi", "chevrolet", "honda", "honda", "h…
## $ vehicle_type <chr> "compact", "compact", "compact", "midsize", "subcompact",…
## $ cty <int> 21, 20, 21, 22, 28, 24, 25, 23, 24, 26, 25, 24, 21, 21, 2…
## $ hwy <int> 29, 31, 30, 30, 33, 32, 32, 29, 32, 34, 36, 36, 29, 30, 3…
## $ avg_mpg <dbl> 25.0, 25.5, 25.5, 26.0, 30.5, 28.0, 28.5, 26.0, 28.0, 30.…
# I think that using the %>% operator directly into head() cuts the table down to 9 observations, but I'm not sure... so I'm just not gonna use it
# Is it just me, or do you have to alter the data all in one pipe-forward sequence?
#Do you have to load the data again every time you add a new chunk? I tried doing the "rename" shenanigans in a new chunk and it didn't work
#Here I attempt to make a new table, which is the same old table (but sorted by brand), then summarize the average MPG of each brand
mpgCleanSort <- mpgClean
mpgCleanSort %>%
group_by(brand)%>%
summarize('Average MPG by Brand' = mean(avg_mpg))
## # A tibble: 7 × 2
## brand `Average MPG by Brand`
## <chr> <dbl>
## 1 audi 25.3
## 2 chevrolet 26
## 3 honda 28.5
## 4 hyundai 25.8
## 5 nissan 26.5
## 6 toyota 27.6
## 7 volkswagen 27.8
#Here I do the same thing again: make a summary table. This one is sorted by vehicle type instead of brand though.
mpgCleanSort2 <-mpgClean
mpgCleanSort2 %>%
group_by(vehicle_type)%>%
summarize('Average MPG by Type' = mean(avg_mpg))
## # A tibble: 3 × 2
## vehicle_type `Average MPG by Type`
## <chr> <dbl>
## 1 compact 27
## 2 midsize 25.9
## 3 subcompact 29.7