Code
# install.packages("tidyverse")
library(tidyverse)From Basic Layers to Multi-Dimensional Visuals
ggplot2 operates based on several layers. To create a complete graph, the following components are required:
# install.packages("tidyverse")
library(tidyverse)# data()
# ?women
head(women) height weight
1 58 115
2 59 117
3 60 120
4 61 123
5 62 126
6 63 129
Here, we have only performed the data and axis mapping. This creates the canvas but no visual marks.
ggplot(data = women,
mapping = aes(x = weight,
y = height))ggplot(data = women,
mapping = aes(x = weight,
y = height)) +
geom_point() +
geom_line()ggplot(women, aes(weight, height)) +
geom_point() +
geom_line()women %>%
ggplot(aes(weight, height)) +
geom_point() +
geom_line()Line Graphs are ideal for showing changes over a continuous scale.
women %>%
ggplot(aes(weight, height)) +
geom_point(size = 3) +
geom_line(color = "red")# Modern method using Piping
women %>%
ggplot(aes(x = weight, y = height)) +
geom_point(size = 3, color = "darkblue") + # Adding points
geom_line(color = "red", linewidth = 1) + # Adding lines
labs(title = "Height vs Weight Relationship",
x = "Weight", y = "Height") +
theme_minimal()Tip: Always use
geom_point()alongsidegeom_line(); this helps in understanding the exact position of each data point.
Boxplots tell us whether there are outliers in the data and how spread out the data is.
# view(chickwts)
names(chickwts)[1] "weight" "feed"
chickwts %>%
ggplot(aes(weight, feed)) +
geom_boxplot()chickwts %>%
ggplot(aes(weight, feed,
fill = feed)) +
geom_boxplot()chickwts %>%
ggplot(aes(weight, feed,
fill = feed)) +
geom_boxplot(alpha = 0.6) # Increasing color transparencychickwts %>%
ggplot(aes(weight, feed, fill = feed)) +
geom_boxplot(alpha = 0.6) +
theme_test() +
labs(x = "Chicken Weight",
y = "Chicken Feeds")chickwts %>%
ggplot(aes(x = feed, y = weight, fill = feed)) +
geom_boxplot(alpha = 0.6) + # Increasing color transparency
theme_test() +
labs(title = "Chicken Weight by Feed Type",
x = "Feed Type", y = "Weight") +
theme(legend.position = "none") # Separate legend is not requiredBar charts are generally used to show the count or frequency of categorical data.
# View(starwars)
names(starwars) [1] "name" "height" "mass" "hair_color" "skin_color"
[6] "eye_color" "birth_year" "sex" "gender" "homeworld"
[11] "species" "films" "vehicles" "starships"
starwars %>%
drop_na(eye_color, gender) %>%
filter(eye_color %in% c("black", "brown",
"blue", "yellow")) %>%
ggplot(aes(eye_color)) +
geom_bar()starwars %>%
drop_na(eye_color, gender) %>%
filter(eye_color %in% c("black", "brown",
"blue", "yellow")) %>%
ggplot(aes(eye_color, fill = gender)) +
geom_bar()starwars %>%
drop_na(eye_color, gender) %>%
filter(eye_color %in% c("black", "brown",
"blue", "yellow")) %>%
ggplot(aes(eye_color, fill = gender)) +
geom_bar(alpha = .5) # Increasing color transparencystarwars %>%
drop_na(eye_color, gender) %>%
filter(eye_color %in% c("black", "brown",
"blue", "yellow")) %>%
ggplot(aes(eye_color, fill = gender)) +
geom_bar(alpha = .5) +
theme_test() +
labs(title = "Simple Bar-Chart",
x = "Eye Colour",
y = "Count")starwars %>%
drop_na(eye_color, gender) %>%
filter(eye_color %in% c("black", "brown",
"blue", "yellow")) %>%
ggplot(aes(eye_color, fill = gender)) +
geom_bar(stat = "count", alpha = .5) +
theme_test() +
labs(title = "Stacked Bar-Chart",
x = "Eye Colour",
y = "Count") +
theme(legend.position = "top")# Grouped Bar Chart (Dodge Position)
starwars %>%
drop_na(eye_color, gender) %>%
filter(eye_color %in% c("black", "brown",
"blue", "yellow")) %>%
ggplot(aes(eye_color, fill = gender)) +
geom_bar(stat = "count", alpha = 0.5,
position = "dodge",
show.legend = FALSE) +
theme_test() +
labs(title = "Grouped Bar-Chart",
x = "Eye Colour",
y = "Count")Difference: Using position = "stack" places bars on top of each other, while position = "dodge" places them side-by-side to make comparison easier.
This is the most powerful part of ggplot2. Here, using the gapminder dataset, we will display 4-5 variables simultaneously.
# install.packages('gapminder')
library(gapminder)# View(gapminder)
head(gapminder, 8)# A tibble: 8 × 6
country continent year lifeExp pop gdpPercap
<fct> <fct> <int> <dbl> <int> <dbl>
1 Afghanistan Asia 1952 28.8 8425333 779.
2 Afghanistan Asia 1957 30.3 9240934 821.
3 Afghanistan Asia 1962 32.0 10267083 853.
4 Afghanistan Asia 1967 34.0 11537966 836.
5 Afghanistan Asia 1972 36.1 13079460 740.
6 Afghanistan Asia 1977 38.4 14880372 786.
7 Afghanistan Asia 1982 39.9 12881816 978.
8 Afghanistan Asia 1987 40.8 13867957 852.
gapminder %>%
filter(continent %in% c("Asia", "Europe")) %>%
filter(gdpPercap < 30000) %>%
ggplot(aes(gdpPercap, lifeExp)) +
geom_point()gapminder %>%
filter(continent %in% c("Asia", "Europe")) %>%
filter(gdpPercap < 30000) %>%
ggplot(aes(gdpPercap, lifeExp,
size = pop,
color = year)) +
geom_point()gapminder %>%
filter(continent %in% c("Asia", "Europe")) %>%
filter(gdpPercap < 30000) %>%
ggplot(aes(gdpPercap, lifeExp,
size = pop,
color = year)) +
geom_point() +
theme_test() +
labs(title = "Life expectancy explained by GDP per capita",
x = "GDP per capita",
y = "Life expectancy")gapminder %>%
filter(continent %in% c("Asia", "Europe"), gdpPercap < 30000) %>%
ggplot(aes(x = gdpPercap, y = lifeExp, size = pop, color = year)) +
geom_point(alpha = 0.6) +
theme_test() +
scale_color_viridis_c() + # Beautiful color gradient
labs(title = "Life Expectancy vs GDP per Capita",
subtitle = "Faceted by Continent | Size = Population",
x = "GDP per Capita",
y = "Life Expectancy")gapminder %>%
filter(continent %in% c("Asia", "Europe"), gdpPercap < 30000) %>%
ggplot(aes(x = gdpPercap, y = lifeExp, size = pop, color = year)) +
geom_point(alpha = 0.6) +
theme_test() +
scale_color_viridis_c() + # Beautiful color gradient
labs(title = "Life Expectancy vs GDP per Capita",
subtitle = "Faceted by Continent | Size = Population",
x = "GDP per Capita",
y = "Life Expectancy") +
facet_wrap(~continent) # Separate panels for Asia and Europeaes(): Creates the skeleton of the graph (Mapping).geom_***(): Creates the muscle or visible parts of the graph (Geometry).facet_wrap(): Divides a large graph into small, meaningful sections (Faceting).theme_***(): Makes the graph visually pleasing or professional (Styling).Excellent guide! You have now mastered all the steps of data visualization through ggplot2, from basic to intermediate levels.