Introduction

Plots are made on Avocado and Pokemon datasets available here on Kaggle (I would suggest not to try to interpret them, I just used them for the purpose of example).

Loading Packages

setwd('~/Asdos/VDE')
library(ggplot2) # plot library
## Warning: package 'ggplot2' was built under R version 4.3.3
library(tidyverse) # for data manipulation
## Warning: package 'tidyr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gridExtra) # multiple plots in 1
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(magick) # attach dope image for visual
## Warning: package 'magick' was built under R version 4.3.3
## Linking to ImageMagick 6.9.12.98
## Enabled features: cairo, freetype, fftw, ghostscript, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fontconfig, x11
library(scales) # show the colors
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggrepel) # for graph repel (labels)
## Warning: package 'ggrepel' was built under R version 4.3.3
library(hexbin) # for hive scatter
## Warning: package 'hexbin' was built under R version 4.3.3
library(naniar) # to check for missing data
## Warning: package 'naniar' was built under R version 4.3.3

Color and Data

# Predefined personal color schemes (change them at your convenience)
colorsReBu <- c("#922B21", "#EE865D", "#DDCD5E", "#59BEC4", "#048B9F", "#114676")
colorsPuYe <- c("#5C374C", "#985277", "#CE6A85", "#FFCF6A", "#FFB742", "#E9692C")
colorsEarth <- c("#DA4511", "#FFBD00", "#6A953F", "#9A6233", "#D3AE7C", "#307CA1")
colorsRainbow <- c("#FF8EC8", "#FFDF51", "#46DBDF", "#FF8F46", "#42BAB7", "#DB0000")
colorsPastels <- c("#FA6E4F", "#F2CF59", "#FB8E7E", "#C5D7C0", "#8EC9BB", "#F8CA9D")

show_col(colorsReBu, labels = F, borders = NA)

show_col(colorsPuYe, labels = F, borders = NA)

show_col(colorsEarth, labels = F, borders = NA)

show_col(colorsRainbow, labels = F, borders = NA)

show_col(colorsPastels, labels = F, borders = NA)

# Importing the data
avocado <- read.csv("avocado.csv")
pokemon <- read.csv("Pokemon.csv")

Look for Theme

p1 <- pokemon %>% 
  ggplot(aes(x = Total)) +
  geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
  theme_classic() +
  theme(axis.text = element_blank()) +
  labs(title = "theme_classic()")

p2 <- pokemon %>% 
  ggplot(aes(x = Total)) +
  geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
  theme_light() +
  theme(axis.text = element_blank()) +
  labs(title = "theme_light()")

p3 <- pokemon %>% 
  ggplot(aes(x = Total)) +
  geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
  theme_bw() + 
  theme(axis.text = element_blank()) +
  labs(title = "theme_bw()")

p4 <- pokemon %>% 
  ggplot(aes(x = Total)) +
  geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
  theme_dark() + 
  theme(axis.text = element_blank()) +
  labs(title = "theme_dark()")

p5 <- pokemon %>% 
  ggplot(aes(x = Total)) +
  geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
  theme_minimal() + 
  theme(axis.text = element_blank()) +
  labs(title = "theme_minimal()")

p6 <- pokemon %>% 
  ggplot(aes(x = Total)) +
  geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
  theme_test() + 
  theme(axis.text = element_blank()) +
  labs(title = "theme_test()")

options(repr.plot.width=16, repr.plot.height=8)
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 3)

Define Your Theme

my_theme <- theme(
        text = element_text(color = "grey35"),
        plot.title = element_text(size = 25, face = "bold"),
        axis.title = element_text(size = 20),
        axis.text = element_text(size = 15),
        axis.line = element_line(size = 1.2, color = "grey35"),
        legend.box.background = element_rect(color = "grey75", size = 1),
        legend.box.margin = margin(t = 5, r = 5, b = 5, l = 5),
        legend.title = element_text(face = "bold", size = 15),
        legend.text = element_text(size=13))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Scatterplot Point Shapes

Comparison of Each Scatterplot Point

p1 <- pokemon %>% 
  head(70) %>% 
  ggplot(aes(x = Defense, y = Attack)) +
  geom_point(shape = 1, size = 5, stroke = 1.3, color = colorsEarth[1]) +
  theme_test() + 
  theme(axis.text = element_blank()) +
  labs(x = "Defense", y = "Attack", title = "Shape1")

p2 <- pokemon %>% 
  head(70) %>% 
  ggplot(aes(x = Defense, y = Attack)) +
  geom_point(shape = 2, size = 5, stroke = 1.3, color = colorsEarth[2]) +
  theme_test() + 
  theme(axis.text = element_blank()) +
  labs(x = "Defense", y = "Attack", title = "Shape2")

p10 <- pokemon %>% 
  head(70) %>% 
  ggplot(aes(x = Defense, y = Attack)) +
  geom_point(shape = 10, size = 5, stroke = 1.3, color = colorsEarth[3]) +
  theme_test() + 
  theme(axis.text = element_blank()) +
  labs(x = "Defense", y = "Attack", title = "Shape10")

p4 <- pokemon %>% 
  head(70) %>% 
  ggplot(aes(x = Defense, y = Attack)) +
  geom_point(shape = 4, size = 5, stroke = 1.3, color = colorsEarth[4]) +
  theme_test() + 
  theme(axis.text = element_blank()) +
  labs(x = "Defense", y = "Attack", title = "Shape4")

p18 <- pokemon %>% 
  head(70) %>% 
  ggplot(aes(x = Defense, y = Attack)) +
  geom_point(shape = 18, size = 5, stroke = 1.3, color = colorsEarth[5]) +
  theme_test() + 
  theme(axis.text = element_blank()) +
  labs(x = "Defense", y = "Attack", title = "Shape18")

p8 <- pokemon %>% 
  head(70) %>% 
  ggplot(aes(x = Defense, y = Attack)) +
  geom_point(shape = 8, size = 5, stroke = 1.3, color = colorsEarth[6]) +
  theme_test() + 
  theme(axis.text = element_blank()) +
  labs(x = "Defense", y = "Attack", title = "Shape8")

options(repr.plot.width=16, repr.plot.height=8)
grid.arrange(p1, p2, p10, p4, p18, p8, ncol = 3)

Plot the Scatterplot Point with Some Category

options(repr.plot.width=16, repr.plot.height=8)

pokemon %>% 
  ggplot(aes(x =Defense, y = Attack)) +

  geom_point(aes(color = as.factor(Generation), shape = Legendary), size = 5, stroke = 1.5, alpha = 0.5) +
  theme_classic() +
  labs(x = "Defense", y = "Attack", title = "Basic Plot", color = "Generation", shape = "Legendary",
       subtitle = "Scatter Plot", caption = "Kaggle:Pokemon Dataset") +

  my_theme +
  scale_color_manual(values = colorsEarth) +

  scale_x_continuous(breaks = seq(0, 250, 25)) +
  scale_y_continuous(breaks = seq(0, 200, 25))

Bubble Plot

options(repr.plot.width=16, repr.plot.height=8)

pokemon %>% 
  filter(Legendary == "True") %>% 
  arrange(desc(Total)) %>% 
  head(150) %>%
  ggplot(aes(x = HP, y = Sp..Atk)) +
  geom_point(aes(color = HP, size = Sp..Atk), alpha = 0.7) +
  scale_size(range = c(1, 20)) +
  theme_bw() +
  labs(x = "HP", y = "Special Attack", title = "Bubble Plot", color = "HP", size = "Special Attack",
       subtitle = "Scatter Plot", caption = "Kaggle:Pokemon Dataset") +
  my_theme +
  theme(panel.border = element_rect(color = "grey35")) +
  scale_color_gradient2(low = colorsEarth[5], mid = colorsEarth[2], high = colorsEarth[1],
                        midpoint = 100)

Contour + Trend Line

options(repr.plot.width=16, repr.plot.height=8)

pokemon %>% 
  ggplot(aes(x = Defense, y = Attack)) + 
  geom_point(alpha = 0.65, size = 5, color = colorsEarth[5]) +
  geom_density2d(lineend = "round", linejoin = "round", n = 200, size = 1, color = colorsEarth[3], alpha = 0.85) +
  
  theme_light() +
  labs(x = "Defense", y = "Attack", title = "Eruption Plot",
       subtitle = "Scatter Plot", caption = "Kaggle:Pokemon Dataset") +
  my_theme +
  theme(panel.border = element_rect(color = "grey35")) +
  geom_segment(aes(x = 48, y = 52, xend = 175, yend = 175), color = colorsEarth[3], size = 1.3,
               arrow = arrow(length = unit(0.5, "cm")))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_segment(aes(x = 48, y = 52, xend = 175, yend = 175), color = colorsEarth[3], : All aesthetics have length 1, but the data has 800 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

LineTypes Available in ggplot()

### Transform ‘date’ column from factor into date type

avocado <- avocado %>%
  mutate(Date = as.Date(Date, format = "%Y-%m-%d"))

averaged_avocado <- avocado %>%
  group_by(Date, region, type) %>% 
  summarise(AveragePrice = mean(AveragePrice),
            Total.Volume = mean(Total.Volume),
            Total.Bags = mean(Total.Bags))
## `summarise()` has grouped output by 'Date', 'region'. You can override using
## the `.groups` argument.
averaged_avocado %>% head(4)
## # A tibble: 4 × 6
## # Groups:   Date, region [2]
##   Date       region  type         AveragePrice Total.Volume Total.Bags
##   <date>     <chr>   <chr>               <dbl>        <dbl>      <dbl>
## 1 2015-01-04 Albany  conventional         1.22       40873.      9716.
## 2 2015-01-04 Albany  organic              1.79        1374.      1163.
## 3 2015-01-04 Atlanta conventional         1         435021.     46816.
## 4 2015-01-04 Atlanta organic              1.76        3847.      1408.

Try graph lineplot for each linetype

data <- avocado %>% 
  filter(region == "SanFrancisco" & type == "conventional" & Date >= "2015-01-01" & Date <= "2015-06-01")

p10 <- data %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "dotted") +
  theme_test() + 
  theme(axis.text = element_blank()) + 
  labs(title = "Dotted Line")

p11 <- data %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "dashed") +
  theme_test() + 
  theme(axis.text = element_blank()) + 
  labs(title = "Dashed Line")

p12 <- data %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "longdash") +
  theme_test() + 
  theme(axis.text = element_blank()) + 
  labs(title = "Longdash Line")

p20 <- data %>%
  ggplot(aes(x = Date)) +
  geom_step(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "solid") +
  theme_test() + 
  theme(axis.text = element_blank()) + 
  labs(title = "Solid Line")

p21 <- data %>%
  ggplot(aes(x = Date)) +
  geom_step(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "dotdash") +
  theme_test() + 
  theme(axis.text = element_blank()) + 
  labs(title = "Dotdash Line")

p22 <- data %>%
  ggplot(aes(x = Date)) +
  geom_step(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "twodash") +
  theme_test() + 
  theme(axis.text = element_blank()) + 
  labs(title = "Twodash Line")

options(repr.plot.width=16, repr.plot.height=8)
grid.arrange(p10, p11, p12, p20, p21, p22, ncol = 3)

Add smoothing line with geom_smooth()

options(repr.plot.width=16, repr.plot.height=8)

avocado %>% 
  filter(region == "SanFrancisco" & type == "conventional") %>%
  
  ggplot(aes(x = Date)) +
  geom_line(aes(y = Total.Volume), size = 1.5, color = colorsReBu[5]) +
  labs(x = "Date", y = "Total Volume", title = "Basic Line Plot", subtitle = "Line Plot", caption = "Kaggle:Avocado Dataset") +
  theme_light() +
  my_theme +
  theme(panel.border = element_rect(color = "grey35")) +
  scale_y_continuous(breaks = seq(0, 2000000, 200000), labels = scales::dollar) +
  geom_smooth(aes(y = Total.Volume), color = colorsReBu[2], se = F, span = 0.4)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Area Plot

options(repr.plot.width=16, repr.plot.height=8)

avocado %>% 
  filter(region %in% c("SanFrancisco", "LosAngeles") & type == "conventional") %>%
  ggplot(aes(x = Date)) +
  geom_area(aes(y = Total.Volume, fill = region), alpha = 0.9) +
  labs(x = "Date", y = "Total Volume", title = "Area Plot", subtitle = "Line Plot", caption = "Kaggle:Avocado Dataset") +
  theme_light() +
  my_theme +
  theme(panel.border = element_rect(color = "grey35")) +
  scale_y_continuous(breaks = seq(0, 7000000, 1000000), labels = scales::dollar) +
  scale_fill_manual(values = c(colorsReBu[1], colorsReBu[4]))

Lineplot + Rectangle Plot

options(repr.plot.width=16, repr.plot.height=8)

data <- avocado %>% 
  filter(region == "LosAngeles" & type == "conventional") %>% 
  group_by(Date) %>% 
  summarise(AveragePrice = mean(AveragePrice))
  
data %>% 
  ggplot(aes(x = Date, y = AveragePrice)) +
  geom_rect(aes(xmin = as.Date('2017-08-01'), ymin = -Inf, xmax = as.Date('2017-11-01'), ymax = Inf), fill = colorsReBu[2]) +
  geom_rect(aes(xmin = as.Date('2016-09-01'), ymin = -Inf, xmax = as.Date('2016-12-01'), ymax = Inf), fill = colorsReBu[2]) +
  geom_path(size = 1.5, color = colorsReBu[6], alpha = 0.8) +
  labs(x = "Date", y = "Average Price", title = "Rectangle Line Plot", subtitle = "Line Plot", caption = "Kaggle:Avocado Dataset") +
  theme_light() +
  my_theme +
  scale_y_continuous(breaks = seq(0, 2, 0.25), labels = scales::dollar)

Standard Barplot

options(repr.plot.width=16, repr.plot.height=8)

pokemon %>% 
  group_by(Type.1) %>% 
  summarise(n = n()) %>% 
  
  ggplot(aes(x = reorder(Type.1, n), y = n)) +
  geom_bar(stat = "identity", aes(fill = n)) +
  coord_flip() +
  geom_label(aes(label = n), size = 4) +
  scale_fill_gradient(low=colorsPuYe[3], high=colorsPuYe[1], guide = "none") +
  geom_hline(yintercept = 30, color = colorsPuYe[4], alpha = 0.6, size = 3) +
  geom_hline(yintercept = 60, color = colorsPuYe[5], alpha = 0.6, size = 3) +
  geom_hline(yintercept = 90, color = colorsPuYe[6], alpha = 0.6, size = 3) +
  theme_test() + 
  my_theme +
  labs(x = "Pokemon Type", y = "Frequency", title = "Simple Barplot", subtitle = "Bar Plot", caption = "Kaggle:Pokemon Dataset")

Clustered Barplot

options(repr.plot.width=16, repr.plot.height=8)

pokemon %>%  
  filter(Type.2 %in% c("Fire", "Dragon", "Fighting", "Fairy", "Ice")) %>% 
  ggplot(aes(x = Type.2, fill = Legendary)) +
  geom_bar(stat = "count", position = "dodge") +
  geom_text(aes(label=..count..), stat="count", position=position_dodge(0.8), vjust=-0.2, size=7) +
  scale_fill_manual(values = c(colorsPuYe[6], colorsPuYe[2])) +
  theme_classic() + 
  my_theme +
  labs(x = "Pokemon Type2", y = "Frequency", title = "Dodged Barplot", subtitle = "Bar Plot", caption = "Kaggle:Pokemon Dataset")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Stacked Barplot

options(repr.plot.width=16, repr.plot.height=8)

pokemon %>%  
  filter(Type.2 %in% c("Fire", "Dragon", "Fighting", "Fairy", "Ice", "Grass")) %>% 
  ggplot(aes(x = Type.2, fill = Legendary)) +
  geom_bar(stat = "count", position = "stack") +
  geom_text(aes(label=..count..), stat="count", position=position_stack(0.5), size=7) +
  scale_fill_manual(values = c(colorsPuYe[6], colorsPuYe[2])) +
  theme_classic() + 
  my_theme +
  labs(x = "Pokemon Type2", y = "Frequency", title = "Stacked Barplot", subtitle = "Bar Plot", caption = "Kaggle:Pokemon Dataset")

Histogram

p1 <- pokemon %>% 
  ggplot(aes(x = Total)) +
  geom_area(stat = "bin", color = "grey30", fill = colorsPastels[1]) +
  theme_test() +
  labs(title = "Area") +
  theme(axis.text = element_blank())

p2 <- pokemon %>% 
  ggplot(aes(x = Total)) +
  geom_density(color = "grey30", fill = colorsPastels[2]) +
  theme_test() +
  labs(title = "Density") +
  theme(axis.text = element_blank())

p3 <- pokemon %>% 
  ggplot(aes(x = Total)) +
  geom_dotplot(color = "grey30", fill = colorsPastels[3], binwidth = 12, bins = 50) +
  theme_test() +
  labs(title = "Dotplot") +
  theme(axis.text = element_blank())
## Warning in geom_dotplot(color = "grey30", fill = colorsPastels[3], binwidth =
## 12, : Ignoring unknown parameters: `bins`
p4 <- pokemon %>% 
  ggplot(aes(x = Total)) +
  geom_histogram(color = "grey30", fill = colorsPastels[4], binwidth = 10) +
  theme_test() +
  labs(title = "Histogram") +
  theme(axis.text = element_blank())

options(repr.plot.width=16, repr.plot.height=8)
grid.arrange(p1, p2, p3, p4, ncol = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Density Plot

options(repr.plot.width=16, repr.plot.height=8)

pokemon %>% 
  filter(Type.2 %in% c("Dragon", "Fighting", "Fairy", "Grass")) %>% 
  ggplot(aes(x = Total, fill = Type.2)) +
  geom_density(size = 0.8, alpha = 0.6) +
  theme_light() + 
  my_theme +
  labs(x = "Total", y = "Density", title = "Density Plot2", subtitle = "Histogram", caption = "Kaggle:Pokemon Dataset",
       fill = "Pokemon Type") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_manual(values = c(colorsPastels[2], colorsPastels[6], colorsPastels[5], colorsPastels[1]))

Boxplot and Violin Plot

p1 <- pokemon %>% 
  ggplot(aes(x = Attack)) +
  geom_boxplot(aes(y = Defense), color = colorsRainbow[6], size = 3) +
  theme_test() + 
  labs(title = "Boxplot") + 
  theme(axis.text = element_blank())

p2 <- pokemon %>% 
  ggplot(aes(x = Attack, y = Defense)) +
  geom_dotplot(binaxis = "y", stackdir = "center", color = colorsRainbow[2],  fill = colorsRainbow[2], 
               bins = 10, binwidth = 4, size = 3) +
  theme_test() + 
  labs(title = "Dotplot") + 
  theme(axis.text = element_blank())
## Warning in geom_dotplot(binaxis = "y", stackdir = "center", color =
## colorsRainbow[2], : Ignoring unknown parameters: `bins` and `size`
p3 <- pokemon %>% 
  ggplot(aes(x = Attack)) +
  geom_violin(aes(y = Defense), color = colorsRainbow[1], size = 3) +
  theme_test() + 
  labs(title = "Violin") + 
  theme(axis.text = element_blank())

options(repr.plot.width=16, repr.plot.height=8)
grid.arrange(p1,p2,p3, layout_matrix = rbind(c(1,2),c(1,3)))
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

Grouped Boxplot

options(repr.plot.width=16, repr.plot.height=8)

pokemon %>% 
  ggplot(aes(x = Attack, y = Defense)) +
  geom_boxplot(aes(fill = Type.1), outlier.fill = "grey35", outlier.shape = 18, outlier.alpha = 0.5, outlier.size = 2.5) + 
  theme_test() + 
  my_theme +
  labs(x = "Attack", y = "Defense", title = "Multiple Boxplots", subtitle = "Box Plot", caption = "Kaggle:Pokemon Dataset",
       fill = "Pokemon Type")

Violin + Dotplot

options(repr.plot.width=16, repr.plot.height=8)

pokemon %>% 
  ggplot(aes(x = Attack, y = Defense)) +
  geom_violin(trim = F, size = 2, color = colorsRainbow[3]) +
  geom_dotplot(binaxis = "y", stackdir = "center", color = colorsRainbow[4], fill = colorsRainbow[1],
               bins = 10, binwidth = 3.5) +
  theme_bw() + 
  my_theme +
  labs(x = "Attach", y = "Defense", title = "Violin Boxplot", subtitle = "Box Plot", caption = "Kaggle:Pokemon Dataset") +
  geom_segment(x = 50, y = 180, xend = 150, yend = 180, size = 3.5, color = colorsRainbow[2])
## Warning in geom_dotplot(binaxis = "y", stackdir = "center", color =
## colorsRainbow[4], : Ignoring unknown parameters: `bins`

credit and reference