Data_Management

knitr::opts_chunk$set(echo = TRUE)
#install.packages("dplyr","tidyverse")
#library("dplyr") - IT TURNS OUT THAT TIDYVERSE INCLUDES DPLYR
library("tidyverse")

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# knitr::kable(mpg, caption="MPG")
# The line above displays the entire table. I commented it out because the table is HUGE and I don't want to print a 234-row table in a HTML document.
head(mpg, 10)

## # A tibble: 10 × 11
##    manufacturer model      displ  year   cyl trans drv     cty   hwy fl    class
##    <chr>        <chr>      <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
##  1 audi         a4           1.8  1999     4 auto… f        18    29 p     comp…
##  2 audi         a4           1.8  1999     4 manu… f        21    29 p     comp…
##  3 audi         a4           2    2008     4 manu… f        20    31 p     comp…
##  4 audi         a4           2    2008     4 auto… f        21    30 p     comp…
##  5 audi         a4           2.8  1999     6 auto… f        16    26 p     comp…
##  6 audi         a4           2.8  1999     6 manu… f        18    26 p     comp…
##  7 audi         a4           3.1  2008     6 auto… f        18    27 p     comp…
##  8 audi         a4 quattro   1.8  1999     4 manu… 4        18    26 p     comp…
##  9 audi         a4 quattro   1.8  1999     4 auto… 4        16    25 p     comp…
## 10 audi         a4 quattro   2    2008     4 manu… 4        20    28 p     comp…

#THIS displays the first 10 rows of the data.

dim(mpg) # This gives me the dimensions of the data. 234 rows, 11 columns.

## [1] 234  11

names(mpg) #This shows me the names of all variables.

##  [1] "manufacturer" "model"        "displ"        "year"         "cyl"         
##  [6] "trans"        "drv"          "cty"          "hwy"          "fl"          
## [11] "class"

mpgClean <- mpg %>%
  select(manufacturer, class, cty, hwy) %>%
  mutate(avg_mpg = (cty + hwy)/2) %>%
  filter(avg_mpg>=25) %>%
  drop_na(class) %>%
  #Not gonna lie, I couldn't figure out how to remove certain rows (presumably observations) from the table, so I skipped it and moved onto the rest of the assignment. Sorry.
  rename(brand = manufacturer) %>%
  rename(vehicle_type = class) %>%
  glimpse()

## Rows: 43
## Columns: 5
## $ brand        <chr> "audi", "audi", "audi", "chevrolet", "honda", "honda", "h…
## $ vehicle_type <chr> "compact", "compact", "compact", "midsize", "subcompact",…
## $ cty          <int> 21, 20, 21, 22, 28, 24, 25, 23, 24, 26, 25, 24, 21, 21, 2…
## $ hwy          <int> 29, 31, 30, 30, 33, 32, 32, 29, 32, 34, 36, 36, 29, 30, 3…
## $ avg_mpg      <dbl> 25.0, 25.5, 25.5, 26.0, 30.5, 28.0, 28.5, 26.0, 28.0, 30.…

# I think that using the %>% operator directly into head() cuts the table down to 9 observations, but I'm not sure... so I'm just not gonna use it
# Is it just me, or do you have to alter the data all in one pipe-forward sequence? 
#Do you have to load the data again every time you add a new chunk? I tried doing the "rename" shenanigans in a new chunk and it didn't work

#Here I attempt to make a new table, which is the same old table (but sorted by brand), then summarize the average MPG of each brand
mpgCleanSort <- mpgClean
mpgCleanSort %>%
  group_by(brand)%>%
  summarize('Average MPG by Brand' = mean(avg_mpg))

## # A tibble: 7 × 2
##   brand      `Average MPG by Brand`
##   <chr>                       <dbl>
## 1 audi                         25.3
## 2 chevrolet                    26  
## 3 honda                        28.5
## 4 hyundai                      25.8
## 5 nissan                       26.5
## 6 toyota                       27.6
## 7 volkswagen                   27.8

#Here I do the same thing again: make a summary table. This one is sorted by vehicle type instead of brand though.
mpgCleanSort2 <-mpgClean
mpgCleanSort2 %>%
  group_by(vehicle_type)%>%
  summarize('Average MPG by Type' = mean(avg_mpg))

## # A tibble: 3 × 2
##   vehicle_type `Average MPG by Type`
##   <chr>                        <dbl>
## 1 compact                       27  
## 2 midsize                       25.9
## 3 subcompact                    29.7

Data_Management

Griffin Carson

2026-02-02