Plots are made on Avocado and Pokemon datasets available here on Kaggle (I would suggest not to try to interpret them, I just used them for the purpose of example).
setwd('~/Asdos/VDE')
library(ggplot2) # plot library
## Warning: package 'ggplot2' was built under R version 4.3.3
library(tidyverse) # for data manipulation
## Warning: package 'tidyr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gridExtra) # multiple plots in 1
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(magick) # attach dope image for visual
## Warning: package 'magick' was built under R version 4.3.3
## Linking to ImageMagick 6.9.12.98
## Enabled features: cairo, freetype, fftw, ghostscript, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fontconfig, x11
library(scales) # show the colors
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(ggrepel) # for graph repel (labels)
## Warning: package 'ggrepel' was built under R version 4.3.3
library(hexbin) # for hive scatter
## Warning: package 'hexbin' was built under R version 4.3.3
library(naniar) # to check for missing data
## Warning: package 'naniar' was built under R version 4.3.3
# Predefined personal color schemes (change them at your convenience)
colorsReBu <- c("#922B21", "#EE865D", "#DDCD5E", "#59BEC4", "#048B9F", "#114676")
colorsPuYe <- c("#5C374C", "#985277", "#CE6A85", "#FFCF6A", "#FFB742", "#E9692C")
colorsEarth <- c("#DA4511", "#FFBD00", "#6A953F", "#9A6233", "#D3AE7C", "#307CA1")
colorsRainbow <- c("#FF8EC8", "#FFDF51", "#46DBDF", "#FF8F46", "#42BAB7", "#DB0000")
colorsPastels <- c("#FA6E4F", "#F2CF59", "#FB8E7E", "#C5D7C0", "#8EC9BB", "#F8CA9D")
show_col(colorsReBu, labels = F, borders = NA)
show_col(colorsPuYe, labels = F, borders = NA)
show_col(colorsEarth, labels = F, borders = NA)
show_col(colorsRainbow, labels = F, borders = NA)
show_col(colorsPastels, labels = F, borders = NA)
# Importing the data
avocado <- read.csv("avocado.csv")
pokemon <- read.csv("Pokemon.csv")
p1 <- pokemon %>%
ggplot(aes(x = Total)) +
geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
theme_classic() +
theme(axis.text = element_blank()) +
labs(title = "theme_classic()")
p2 <- pokemon %>%
ggplot(aes(x = Total)) +
geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
theme_light() +
theme(axis.text = element_blank()) +
labs(title = "theme_light()")
p3 <- pokemon %>%
ggplot(aes(x = Total)) +
geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
theme_bw() +
theme(axis.text = element_blank()) +
labs(title = "theme_bw()")
p4 <- pokemon %>%
ggplot(aes(x = Total)) +
geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
theme_dark() +
theme(axis.text = element_blank()) +
labs(title = "theme_dark()")
p5 <- pokemon %>%
ggplot(aes(x = Total)) +
geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
theme_minimal() +
theme(axis.text = element_blank()) +
labs(title = "theme_minimal()")
p6 <- pokemon %>%
ggplot(aes(x = Total)) +
geom_histogram(bins = 7, fill = "orange", alpha = 0.6, color = "grey35") +
theme_test() +
theme(axis.text = element_blank()) +
labs(title = "theme_test()")
options(repr.plot.width=16, repr.plot.height=8)
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 3)
my_theme <- theme(
text = element_text(color = "grey35"),
plot.title = element_text(size = 25, face = "bold"),
axis.title = element_text(size = 20),
axis.text = element_text(size = 15),
axis.line = element_line(size = 1.2, color = "grey35"),
legend.box.background = element_rect(color = "grey75", size = 1),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 5),
legend.title = element_text(face = "bold", size = 15),
legend.text = element_text(size=13))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p1 <- pokemon %>%
head(70) %>%
ggplot(aes(x = Defense, y = Attack)) +
geom_point(shape = 1, size = 5, stroke = 1.3, color = colorsEarth[1]) +
theme_test() +
theme(axis.text = element_blank()) +
labs(x = "Defense", y = "Attack", title = "Shape1")
p2 <- pokemon %>%
head(70) %>%
ggplot(aes(x = Defense, y = Attack)) +
geom_point(shape = 2, size = 5, stroke = 1.3, color = colorsEarth[2]) +
theme_test() +
theme(axis.text = element_blank()) +
labs(x = "Defense", y = "Attack", title = "Shape2")
p10 <- pokemon %>%
head(70) %>%
ggplot(aes(x = Defense, y = Attack)) +
geom_point(shape = 10, size = 5, stroke = 1.3, color = colorsEarth[3]) +
theme_test() +
theme(axis.text = element_blank()) +
labs(x = "Defense", y = "Attack", title = "Shape10")
p4 <- pokemon %>%
head(70) %>%
ggplot(aes(x = Defense, y = Attack)) +
geom_point(shape = 4, size = 5, stroke = 1.3, color = colorsEarth[4]) +
theme_test() +
theme(axis.text = element_blank()) +
labs(x = "Defense", y = "Attack", title = "Shape4")
p18 <- pokemon %>%
head(70) %>%
ggplot(aes(x = Defense, y = Attack)) +
geom_point(shape = 18, size = 5, stroke = 1.3, color = colorsEarth[5]) +
theme_test() +
theme(axis.text = element_blank()) +
labs(x = "Defense", y = "Attack", title = "Shape18")
p8 <- pokemon %>%
head(70) %>%
ggplot(aes(x = Defense, y = Attack)) +
geom_point(shape = 8, size = 5, stroke = 1.3, color = colorsEarth[6]) +
theme_test() +
theme(axis.text = element_blank()) +
labs(x = "Defense", y = "Attack", title = "Shape8")
options(repr.plot.width=16, repr.plot.height=8)
grid.arrange(p1, p2, p10, p4, p18, p8, ncol = 3)
options(repr.plot.width=16, repr.plot.height=8)
pokemon %>%
ggplot(aes(x =Defense, y = Attack)) +
geom_point(aes(color = as.factor(Generation), shape = Legendary), size = 5, stroke = 1.5, alpha = 0.5) +
theme_classic() +
labs(x = "Defense", y = "Attack", title = "Basic Plot", color = "Generation", shape = "Legendary",
subtitle = "Scatter Plot", caption = "Kaggle:Pokemon Dataset") +
my_theme +
scale_color_manual(values = colorsEarth) +
scale_x_continuous(breaks = seq(0, 250, 25)) +
scale_y_continuous(breaks = seq(0, 200, 25))
options(repr.plot.width=16, repr.plot.height=8)
pokemon %>%
filter(Legendary == "True") %>%
arrange(desc(Total)) %>%
head(150) %>%
ggplot(aes(x = HP, y = Sp..Atk)) +
geom_point(aes(color = HP, size = Sp..Atk), alpha = 0.7) +
scale_size(range = c(1, 20)) +
theme_bw() +
labs(x = "HP", y = "Special Attack", title = "Bubble Plot", color = "HP", size = "Special Attack",
subtitle = "Scatter Plot", caption = "Kaggle:Pokemon Dataset") +
my_theme +
theme(panel.border = element_rect(color = "grey35")) +
scale_color_gradient2(low = colorsEarth[5], mid = colorsEarth[2], high = colorsEarth[1],
midpoint = 100)
options(repr.plot.width=16, repr.plot.height=8)
pokemon %>%
ggplot(aes(x = Defense, y = Attack)) +
geom_point(alpha = 0.65, size = 5, color = colorsEarth[5]) +
geom_density2d(lineend = "round", linejoin = "round", n = 200, size = 1, color = colorsEarth[3], alpha = 0.85) +
theme_light() +
labs(x = "Defense", y = "Attack", title = "Eruption Plot",
subtitle = "Scatter Plot", caption = "Kaggle:Pokemon Dataset") +
my_theme +
theme(panel.border = element_rect(color = "grey35")) +
geom_segment(aes(x = 48, y = 52, xend = 175, yend = 175), color = colorsEarth[3], size = 1.3,
arrow = arrow(length = unit(0.5, "cm")))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_segment(aes(x = 48, y = 52, xend = 175, yend = 175), color = colorsEarth[3], : All aesthetics have length 1, but the data has 800 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
ggplot() ### Transform ‘date’ column from
factor into date type
avocado <- avocado %>%
mutate(Date = as.Date(Date, format = "%Y-%m-%d"))
averaged_avocado <- avocado %>%
group_by(Date, region, type) %>%
summarise(AveragePrice = mean(AveragePrice),
Total.Volume = mean(Total.Volume),
Total.Bags = mean(Total.Bags))
## `summarise()` has grouped output by 'Date', 'region'. You can override using
## the `.groups` argument.
averaged_avocado %>% head(4)
## # A tibble: 4 × 6
## # Groups: Date, region [2]
## Date region type AveragePrice Total.Volume Total.Bags
## <date> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2015-01-04 Albany conventional 1.22 40873. 9716.
## 2 2015-01-04 Albany organic 1.79 1374. 1163.
## 3 2015-01-04 Atlanta conventional 1 435021. 46816.
## 4 2015-01-04 Atlanta organic 1.76 3847. 1408.
data <- avocado %>%
filter(region == "SanFrancisco" & type == "conventional" & Date >= "2015-01-01" & Date <= "2015-06-01")
p10 <- data %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "dotted") +
theme_test() +
theme(axis.text = element_blank()) +
labs(title = "Dotted Line")
p11 <- data %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "dashed") +
theme_test() +
theme(axis.text = element_blank()) +
labs(title = "Dashed Line")
p12 <- data %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "longdash") +
theme_test() +
theme(axis.text = element_blank()) +
labs(title = "Longdash Line")
p20 <- data %>%
ggplot(aes(x = Date)) +
geom_step(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "solid") +
theme_test() +
theme(axis.text = element_blank()) +
labs(title = "Solid Line")
p21 <- data %>%
ggplot(aes(x = Date)) +
geom_step(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "dotdash") +
theme_test() +
theme(axis.text = element_blank()) +
labs(title = "Dotdash Line")
p22 <- data %>%
ggplot(aes(x = Date)) +
geom_step(aes(y = Total.Volume), size = 2, color = colorsReBu[6], linetype = "twodash") +
theme_test() +
theme(axis.text = element_blank()) +
labs(title = "Twodash Line")
options(repr.plot.width=16, repr.plot.height=8)
grid.arrange(p10, p11, p12, p20, p21, p22, ncol = 3)
geom_smooth()options(repr.plot.width=16, repr.plot.height=8)
avocado %>%
filter(region == "SanFrancisco" & type == "conventional") %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Total.Volume), size = 1.5, color = colorsReBu[5]) +
labs(x = "Date", y = "Total Volume", title = "Basic Line Plot", subtitle = "Line Plot", caption = "Kaggle:Avocado Dataset") +
theme_light() +
my_theme +
theme(panel.border = element_rect(color = "grey35")) +
scale_y_continuous(breaks = seq(0, 2000000, 200000), labels = scales::dollar) +
geom_smooth(aes(y = Total.Volume), color = colorsReBu[2], se = F, span = 0.4)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
options(repr.plot.width=16, repr.plot.height=8)
avocado %>%
filter(region %in% c("SanFrancisco", "LosAngeles") & type == "conventional") %>%
ggplot(aes(x = Date)) +
geom_area(aes(y = Total.Volume, fill = region), alpha = 0.9) +
labs(x = "Date", y = "Total Volume", title = "Area Plot", subtitle = "Line Plot", caption = "Kaggle:Avocado Dataset") +
theme_light() +
my_theme +
theme(panel.border = element_rect(color = "grey35")) +
scale_y_continuous(breaks = seq(0, 7000000, 1000000), labels = scales::dollar) +
scale_fill_manual(values = c(colorsReBu[1], colorsReBu[4]))
options(repr.plot.width=16, repr.plot.height=8)
data <- avocado %>%
filter(region == "LosAngeles" & type == "conventional") %>%
group_by(Date) %>%
summarise(AveragePrice = mean(AveragePrice))
data %>%
ggplot(aes(x = Date, y = AveragePrice)) +
geom_rect(aes(xmin = as.Date('2017-08-01'), ymin = -Inf, xmax = as.Date('2017-11-01'), ymax = Inf), fill = colorsReBu[2]) +
geom_rect(aes(xmin = as.Date('2016-09-01'), ymin = -Inf, xmax = as.Date('2016-12-01'), ymax = Inf), fill = colorsReBu[2]) +
geom_path(size = 1.5, color = colorsReBu[6], alpha = 0.8) +
labs(x = "Date", y = "Average Price", title = "Rectangle Line Plot", subtitle = "Line Plot", caption = "Kaggle:Avocado Dataset") +
theme_light() +
my_theme +
scale_y_continuous(breaks = seq(0, 2, 0.25), labels = scales::dollar)
options(repr.plot.width=16, repr.plot.height=8)
pokemon %>%
group_by(Type.1) %>%
summarise(n = n()) %>%
ggplot(aes(x = reorder(Type.1, n), y = n)) +
geom_bar(stat = "identity", aes(fill = n)) +
coord_flip() +
geom_label(aes(label = n), size = 4) +
scale_fill_gradient(low=colorsPuYe[3], high=colorsPuYe[1], guide = "none") +
geom_hline(yintercept = 30, color = colorsPuYe[4], alpha = 0.6, size = 3) +
geom_hline(yintercept = 60, color = colorsPuYe[5], alpha = 0.6, size = 3) +
geom_hline(yintercept = 90, color = colorsPuYe[6], alpha = 0.6, size = 3) +
theme_test() +
my_theme +
labs(x = "Pokemon Type", y = "Frequency", title = "Simple Barplot", subtitle = "Bar Plot", caption = "Kaggle:Pokemon Dataset")
options(repr.plot.width=16, repr.plot.height=8)
pokemon %>%
filter(Type.2 %in% c("Fire", "Dragon", "Fighting", "Fairy", "Ice")) %>%
ggplot(aes(x = Type.2, fill = Legendary)) +
geom_bar(stat = "count", position = "dodge") +
geom_text(aes(label=..count..), stat="count", position=position_dodge(0.8), vjust=-0.2, size=7) +
scale_fill_manual(values = c(colorsPuYe[6], colorsPuYe[2])) +
theme_classic() +
my_theme +
labs(x = "Pokemon Type2", y = "Frequency", title = "Dodged Barplot", subtitle = "Bar Plot", caption = "Kaggle:Pokemon Dataset")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
options(repr.plot.width=16, repr.plot.height=8)
pokemon %>%
filter(Type.2 %in% c("Fire", "Dragon", "Fighting", "Fairy", "Ice", "Grass")) %>%
ggplot(aes(x = Type.2, fill = Legendary)) +
geom_bar(stat = "count", position = "stack") +
geom_text(aes(label=..count..), stat="count", position=position_stack(0.5), size=7) +
scale_fill_manual(values = c(colorsPuYe[6], colorsPuYe[2])) +
theme_classic() +
my_theme +
labs(x = "Pokemon Type2", y = "Frequency", title = "Stacked Barplot", subtitle = "Bar Plot", caption = "Kaggle:Pokemon Dataset")
Histogram
p1 <- pokemon %>%
ggplot(aes(x = Total)) +
geom_area(stat = "bin", color = "grey30", fill = colorsPastels[1]) +
theme_test() +
labs(title = "Area") +
theme(axis.text = element_blank())
p2 <- pokemon %>%
ggplot(aes(x = Total)) +
geom_density(color = "grey30", fill = colorsPastels[2]) +
theme_test() +
labs(title = "Density") +
theme(axis.text = element_blank())
p3 <- pokemon %>%
ggplot(aes(x = Total)) +
geom_dotplot(color = "grey30", fill = colorsPastels[3], binwidth = 12, bins = 50) +
theme_test() +
labs(title = "Dotplot") +
theme(axis.text = element_blank())
## Warning in geom_dotplot(color = "grey30", fill = colorsPastels[3], binwidth =
## 12, : Ignoring unknown parameters: `bins`
p4 <- pokemon %>%
ggplot(aes(x = Total)) +
geom_histogram(color = "grey30", fill = colorsPastels[4], binwidth = 10) +
theme_test() +
labs(title = "Histogram") +
theme(axis.text = element_blank())
options(repr.plot.width=16, repr.plot.height=8)
grid.arrange(p1, p2, p3, p4, ncol = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
options(repr.plot.width=16, repr.plot.height=8)
pokemon %>%
filter(Type.2 %in% c("Dragon", "Fighting", "Fairy", "Grass")) %>%
ggplot(aes(x = Total, fill = Type.2)) +
geom_density(size = 0.8, alpha = 0.6) +
theme_light() +
my_theme +
labs(x = "Total", y = "Density", title = "Density Plot2", subtitle = "Histogram", caption = "Kaggle:Pokemon Dataset",
fill = "Pokemon Type") +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = c(colorsPastels[2], colorsPastels[6], colorsPastels[5], colorsPastels[1]))
p1 <- pokemon %>%
ggplot(aes(x = Attack)) +
geom_boxplot(aes(y = Defense), color = colorsRainbow[6], size = 3) +
theme_test() +
labs(title = "Boxplot") +
theme(axis.text = element_blank())
p2 <- pokemon %>%
ggplot(aes(x = Attack, y = Defense)) +
geom_dotplot(binaxis = "y", stackdir = "center", color = colorsRainbow[2], fill = colorsRainbow[2],
bins = 10, binwidth = 4, size = 3) +
theme_test() +
labs(title = "Dotplot") +
theme(axis.text = element_blank())
## Warning in geom_dotplot(binaxis = "y", stackdir = "center", color =
## colorsRainbow[2], : Ignoring unknown parameters: `bins` and `size`
p3 <- pokemon %>%
ggplot(aes(x = Attack)) +
geom_violin(aes(y = Defense), color = colorsRainbow[1], size = 3) +
theme_test() +
labs(title = "Violin") +
theme(axis.text = element_blank())
options(repr.plot.width=16, repr.plot.height=8)
grid.arrange(p1,p2,p3, layout_matrix = rbind(c(1,2),c(1,3)))
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
options(repr.plot.width=16, repr.plot.height=8)
pokemon %>%
ggplot(aes(x = Attack, y = Defense)) +
geom_boxplot(aes(fill = Type.1), outlier.fill = "grey35", outlier.shape = 18, outlier.alpha = 0.5, outlier.size = 2.5) +
theme_test() +
my_theme +
labs(x = "Attack", y = "Defense", title = "Multiple Boxplots", subtitle = "Box Plot", caption = "Kaggle:Pokemon Dataset",
fill = "Pokemon Type")
options(repr.plot.width=16, repr.plot.height=8)
pokemon %>%
ggplot(aes(x = Attack, y = Defense)) +
geom_violin(trim = F, size = 2, color = colorsRainbow[3]) +
geom_dotplot(binaxis = "y", stackdir = "center", color = colorsRainbow[4], fill = colorsRainbow[1],
bins = 10, binwidth = 3.5) +
theme_bw() +
my_theme +
labs(x = "Attach", y = "Defense", title = "Violin Boxplot", subtitle = "Box Plot", caption = "Kaggle:Pokemon Dataset") +
geom_segment(x = 50, y = 180, xend = 150, yend = 180, size = 3.5, color = colorsRainbow[2])
## Warning in geom_dotplot(binaxis = "y", stackdir = "center", color =
## colorsRainbow[4], : Ignoring unknown parameters: `bins`