Module 3-1-Principle - Data Visualization with ggplot2 in R

Author

Harrison Gan

Published

February 25, 2026

#install.packages("ggplot2")
#install.packages('ggrepel')
#install.packages('ggthemes')
#install.packages('scales')
#install.packages('plotly')
#install.packages('lattice')
#install.packages('GGally')
#install.packages("dplyr")
#install.packages("tidyverse")
#install.packages('ggtext')
#install.packages("glue")


library(ggplot2) #visualization
library(ggrepel) #labels for data
library(ggthemes) #collections of themes
library(scales) # scale
library(plotly) # interactive chart
library(GGally) # correlation
library(dplyr) # data transformation
library(tidyverse) # mega package containing 8 packages
library(ggtext) # for text visualization
library(glue) # combining multiple component
library(gapminder)
library(tibble)
library(migritter) # for the pipe operator
Error in `library()`:
! there is no package called 'migritter'

1. Understand mtcars data

1.1 Using Help

?mtcars

A data frame with 32 observations on 11 (numeric) variables.

  • [, 1] mpg Miles/(US) gallon

  • [, 2] cyl Number of cylinders

  • [, 3] disp Displacement (cu.in.)

  • [, 4] hp Gross horsepower

  • [, 5] drat Rear axle ratio

  • [, 6] wt Weight (1000 lbs)

  • [, 7] qsec 1/4 mile time

  • [, 8] vs Engine (0 = V-shaped, 1 = straight)

  • [, 9] am Transmission (0 = automatic, 1 = manual)

  • [,10] gear Number of forward gears

  • [,11] carb Number of carburetors Note]

1.2 Reading data and converting to a tibble (cars)

head(mtcars)
class(mtcars)
[1] "data.frame"
mtcars
cars <- 
  mtcars %>% # piping operator from dplyr (shortcut: Ctrl+Shift+M)
    rownames_to_column() %>% # do this before changing the data to tibble as the conversion will remove rownames in tibble.
    as_tibble() %>%  
    rename(model = rowname) %>% 
    print (n = 20, width = Inf)
# A tibble: 32 × 12
   model                 mpg   cyl  disp    hp  drat    wt  qsec    vs    am
   <chr>               <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 Mazda RX4            21       6 160     110  3.9   2.62  16.5     0     1
 2 Mazda RX4 Wag        21       6 160     110  3.9   2.88  17.0     0     1
 3 Datsun 710           22.8     4 108      93  3.85  2.32  18.6     1     1
 4 Hornet 4 Drive       21.4     6 258     110  3.08  3.22  19.4     1     0
 5 Hornet Sportabout    18.7     8 360     175  3.15  3.44  17.0     0     0
 6 Valiant              18.1     6 225     105  2.76  3.46  20.2     1     0
 7 Duster 360           14.3     8 360     245  3.21  3.57  15.8     0     0
 8 Merc 240D            24.4     4 147.     62  3.69  3.19  20       1     0
 9 Merc 230             22.8     4 141.     95  3.92  3.15  22.9     1     0
10 Merc 280             19.2     6 168.    123  3.92  3.44  18.3     1     0
11 Merc 280C            17.8     6 168.    123  3.92  3.44  18.9     1     0
12 Merc 450SE           16.4     8 276.    180  3.07  4.07  17.4     0     0
13 Merc 450SL           17.3     8 276.    180  3.07  3.73  17.6     0     0
14 Merc 450SLC          15.2     8 276.    180  3.07  3.78  18       0     0
15 Cadillac Fleetwood   10.4     8 472     205  2.93  5.25  18.0     0     0
16 Lincoln Continental  10.4     8 460     215  3     5.42  17.8     0     0
17 Chrysler Imperial    14.7     8 440     230  3.23  5.34  17.4     0     0
18 Fiat 128             32.4     4  78.7    66  4.08  2.2   19.5     1     1
19 Honda Civic          30.4     4  75.7    52  4.93  1.62  18.5     1     1
20 Toyota Corolla       33.9     4  71.1    65  4.22  1.84  19.9     1     1
    gear  carb
   <dbl> <dbl>
 1     4     4
 2     4     4
 3     4     1
 4     3     1
 5     3     2
 6     3     1
 7     3     4
 8     4     2
 9     4     2
10     4     4
11     4     4
12     3     3
13     3     3
14     3     3
15     3     4
16     3     4
17     3     4
18     4     1
19     4     2
20     4     1
# ℹ 12 more rows
cars <-
  mtcars is # piping operator from dlpyer (shortcut: Ctrl+Shift)
  rownames_to_column() %/% # do this before changing the data to tibble as the conversion will remove rownames in tibble
    as_tibble() |>
    print (n = 20, width = Inf)
Error in parse(text = input): <text>:2:10: unexpected symbol
1: cars <-
2:   mtcars is
            ^

**1.3 Simple Descriptive Statistics

summary(cars)
    model                mpg             cyl             disp      
 Length:32          Min.   :10.40   Min.   :4.000   Min.   : 71.1  
 Class :character   1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8  
 Mode  :character   Median :19.20   Median :6.000   Median :196.3  
                    Mean   :20.09   Mean   :6.188   Mean   :230.7  
                    3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0  
                    Max.   :33.90   Max.   :8.000   Max.   :472.0  
       hp             drat             wt             qsec      
 Min.   : 52.0   Min.   :2.760   Min.   :1.513   Min.   :14.50  
 1st Qu.: 96.5   1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89  
 Median :123.0   Median :3.695   Median :3.325   Median :17.71  
 Mean   :146.7   Mean   :3.597   Mean   :3.217   Mean   :17.85  
 3rd Qu.:180.0   3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90  
 Max.   :335.0   Max.   :4.930   Max.   :5.424   Max.   :22.90  
       vs               am              gear            carb      
 Min.   :0.0000   Min.   :0.0000   Min.   :3.000   Min.   :1.000  
 1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
 Median :0.0000   Median :0.0000   Median :4.000   Median :2.000  
 Mean   :0.4375   Mean   :0.4062   Mean   :3.688   Mean   :2.812  
 3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
 Max.   :1.0000   Max.   :1.0000   Max.   :5.000   Max.   :8.000  
glimpse(cars)
Rows: 32
Columns: 12
$ model <chr> "Mazda RX4", "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "H…
$ mpg   <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8…
$ cyl   <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8…
$ disp  <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 1…
$ hp    <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 18…
$ drat  <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92…
$ wt    <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3…
$ qsec  <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 1…
$ vs    <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0…
$ am    <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0…
$ gear  <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3…
$ carb  <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2…
skimr::skim(cars)
Data summary
Name cars
Number of rows 32
Number of columns 12
_______________________
Column type frequency:
character 1
numeric 11
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
model 0 1 7 19 0 32 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
mpg 0 1 20.09 6.03 10.40 15.43 19.20 22.80 33.90 ▃▇▅▁▂
cyl 0 1 6.19 1.79 4.00 4.00 6.00 8.00 8.00 ▆▁▃▁▇
disp 0 1 230.72 123.94 71.10 120.83 196.30 326.00 472.00 ▇▃▃▃▂
hp 0 1 146.69 68.56 52.00 96.50 123.00 180.00 335.00 ▇▇▆▃▁
drat 0 1 3.60 0.53 2.76 3.08 3.70 3.92 4.93 ▇▃▇▅▁
wt 0 1 3.22 0.98 1.51 2.58 3.33 3.61 5.42 ▃▃▇▁▂
qsec 0 1 17.85 1.79 14.50 16.89 17.71 18.90 22.90 ▃▇▇▂▁
vs 0 1 0.44 0.50 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▆
am 0 1 0.41 0.50 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▆
gear 0 1 3.69 0.74 3.00 3.00 4.00 4.00 5.00 ▇▁▆▁▂
carb 0 1 2.81 1.62 1.00 2.00 2.00 4.00 8.00 ▇▂▅▁▁

#Basic Plotting Methods in Base R

# Using a built  in plotting function 
hist(cars$disp, breaks = 10)

3.Lattice package

library(lattice)
xyplot(mpg ~ wt, cars)

#4. ggplot2

  • We will use ggplot2 - the best tool in the market for data visualization from now on

4.1. Elaborate Example

Data Wrangling

cars <- 
  mtcars |>
    rownames_to_column() %>%
    as_tibble() |>
  rename(model = rowname)

Plotting

cars |>
  count(cyl)
easy_tabels_n <- as_labeller(c("4"= "4 Cylinder Cars",
                             "6" = "6 Clyinder Cars",
                              "8"= "8 Clyinder Cars"
                                )
                              )
                              
cars |>
  ggplot(aes(x = mpg,y = disp, color = factor(cyl))) +
  geom_point(size = 3,
             color= "black"
             ) + 
  geom_smooth(method = lm, se = FALSE) + 
  facet_wrap(~ cyl,
              ncol = 1) +
theme_bw()