Data Homework

Install Packages and Load Data

install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#selecting which built in dataset to use and prevewing the first 10 rows 
mpg= read_builtin("mpg")
head(mpg, 10)
## # A tibble: 10 × 11
##    manufacturer model      displ  year   cyl trans drv     cty   hwy fl    class
##    <chr>        <chr>      <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
##  1 audi         a4           1.8  1999     4 auto… f        18    29 p     comp…
##  2 audi         a4           1.8  1999     4 manu… f        21    29 p     comp…
##  3 audi         a4           2    2008     4 manu… f        20    31 p     comp…
##  4 audi         a4           2    2008     4 auto… f        21    30 p     comp…
##  5 audi         a4           2.8  1999     6 auto… f        16    26 p     comp…
##  6 audi         a4           2.8  1999     6 manu… f        18    26 p     comp…
##  7 audi         a4           3.1  2008     6 auto… f        18    27 p     comp…
##  8 audi         a4 quattro   1.8  1999     4 manu… 4        18    26 p     comp…
##  9 audi         a4 quattro   1.8  1999     4 auto… 4        16    25 p     comp…
## 10 audi         a4 quattro   2    2008     4 manu… 4        20    28 p     comp…

Names of all variables

#displaying names of all variables
names(mpg)
##  [1] "manufacturer" "model"        "displ"        "year"         "cyl"         
##  [6] "trans"        "drv"          "cty"          "hwy"          "fl"          
## [11] "class"

Organizing Data

#only want manufacturer,class,cty,hwy to be used
mpg_filterd= mpg %>%
  select(manufacturer,class,cty,hwy) %>% 
  mutate(avg_mpg= as.numeric(cty+hwy)/2) %>% 
  filter(avg_mpg >= 25,!is.na(class)) %>%  # keeps only mpg cars greater than 25 and !is.na removes any missing class and slice removes rows 2 and 5
  slice(-c(2, 5)) %>%
  #renaming variables using select() The new name appears on the left-hand side of the =, and the old variable appears on the right-hand side
  select(brand=manufacturer,vehicle_type=class,cty,hwy,avg_mpg)

head(mpg_filterd)
## # A tibble: 6 × 5
##   brand     vehicle_type   cty   hwy avg_mpg
##   <chr>     <chr>        <int> <int>   <dbl>
## 1 audi      compact         21    29    25  
## 2 audi      compact         21    30    25.5
## 3 chevrolet midsize         22    30    26  
## 4 honda     subcompact      24    32    28  
## 5 honda     subcompact      25    32    28.5
## 6 honda     subcompact      23    29    26

Creating a Summary Table

summary_table= mpg_filterd %>%
  group_by(brand, vehicle_type) %>%
  summarise(mean_avg_mpg= mean(avg_mpg, na.rm = TRUE), vehicle_count= n(),.groups = "drop")

head(summary_table, 11)
## # A tibble: 11 × 4
##    brand      vehicle_type mean_avg_mpg vehicle_count
##    <chr>      <chr>               <dbl>         <int>
##  1 audi       compact              25.2             2
##  2 chevrolet  midsize              26               1
##  3 honda      subcompact           28.2             8
##  4 hyundai    midsize              25.8             2
##  5 nissan     compact              25               1
##  6 nissan     midsize              27.2             2
##  7 toyota     compact              28.3             8
##  8 toyota     midsize              25.7             3
##  9 volkswagen compact              26.6             9
## 10 volkswagen midsize              25               2
## 11 volkswagen subcompact           33.2             3