Introduction

This analysis demonstrates how to load, inspect, clean, transform, and summarize data using R and the tidyverse package.
The mpg dataset from the ggplot2 package is used to explore fuel efficiency across different vehicle brands and types.


Load Required Package

# Load tidyverse, which includes ggplot2 and dplyr
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load the mpg dataset
data(mpg)

# Preview the first 10 rows
head(mpg, 10)
## # A tibble: 10 × 11
##    manufacturer model      displ  year   cyl trans drv     cty   hwy fl    class
##    <chr>        <chr>      <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
##  1 audi         a4           1.8  1999     4 auto… f        18    29 p     comp…
##  2 audi         a4           1.8  1999     4 manu… f        21    29 p     comp…
##  3 audi         a4           2    2008     4 manu… f        20    31 p     comp…
##  4 audi         a4           2    2008     4 auto… f        21    30 p     comp…
##  5 audi         a4           2.8  1999     6 auto… f        16    26 p     comp…
##  6 audi         a4           2.8  1999     6 manu… f        18    26 p     comp…
##  7 audi         a4           3.1  2008     6 auto… f        18    27 p     comp…
##  8 audi         a4 quattro   1.8  1999     4 manu… 4        18    26 p     comp…
##  9 audi         a4 quattro   1.8  1999     4 auto… 4        16    25 p     comp…
## 10 audi         a4 quattro   2    2008     4 manu… 4        20    28 p     comp…
# View the structure of the dataset
str(mpg)
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...
# Display all column names
names(mpg)
##  [1] "manufacturer" "model"        "displ"        "year"         "cyl"         
##  [6] "trans"        "drv"          "cty"          "hwy"          "fl"          
## [11] "class"
# Step 1: Select key columns
# Step 2: Create avg_mpg as the average of city and highway MPG
# Step 3: Filter based on avg_mpg, remove missing classes, and remove rows 2 and 5
# Step 4: Rename columns for clarity
# Step 5: Group data and create summary statistics

summary_table <- mpg %>%
  select(manufacturer, class, cty, hwy) %>%
  mutate(avg_mpg = as.numeric((cty + hwy) / 2)) %>%
  filter(avg_mpg >= 25, !is.na(class)) %>%
  slice(-c(2, 5)) %>%
  rename(
    brand = manufacturer,
    vehicle_type = class
  ) %>%
  group_by(brand, vehicle_type) %>%
  summarise(
    avg_mpg_mean = mean(avg_mpg),
    vehicle_count = n(),
    .groups = "drop"
  )

# Display the final summary table
summary_table
## # A tibble: 11 × 4
##    brand      vehicle_type avg_mpg_mean vehicle_count
##    <chr>      <chr>               <dbl>         <int>
##  1 audi       compact              25.2             2
##  2 chevrolet  midsize              26               1
##  3 honda      subcompact           28.2             8
##  4 hyundai    midsize              25.8             2
##  5 nissan     compact              25               1
##  6 nissan     midsize              27.2             2
##  7 toyota     compact              28.3             8
##  8 toyota     midsize              25.7             3
##  9 volkswagen compact              26.6             9
## 10 volkswagen midsize              25               2
## 11 volkswagen subcompact           33.2             3