Pie charts show how a whole is divided into parts.
Each slice represents the proportion of a category relative to the total.
They are commonly used when:
However, pie charts can become difficult to interpret when there are many slices.
We will use the mpg dataset and count the number of vehicles in each class.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data("mpg")
class_counts <- mpg %>%
count(class)
ggplot(class_counts, aes(x = "", y = n, fill = class)) +
geom_col(width = 1) +
coord_polar("y") +
theme_void() +
labs(title = "Vehicle Class Distribution")
Explanation
Prop <- c(3,7,9,1,2)
pie(Prop)
pie(Prop , labels = c("Gr-A","Gr-B","Gr-C","Gr-D","Gr-E")) #add labels to slices
pie(Prop , labels = c("Gr-A","Gr-B","Gr-C","Gr-D","Gr-E") , density=10 , angle=c(20,90,30,10,0)) # Add dashed lines by using density, and control the angles.
Donut charts are similar to pie charts but include a hole in the center. This can make labels easier to read and reduce visual clutter.
ggplot(class_counts, aes(x = 2, y = n, fill = class)) +
geom_col(width = 1) +
coord_polar("y") +
xlim(0.5, 2.5) +
theme_void() +
labs(title = "Vehicle Class Distribution (Donut Chart)")
Explanation
The donut hole is created by manipulating the x-axis limits.
To get percentage labels positioned around the donut like in your example, you need to:
library(ggplot2)
library(dplyr)
library(scales)
df <- data.frame(
category = c("A","B","C"),
value = c(10,60,30)
)
# Compute percentages and label positions
df <- df %>%
mutate(
prop = value / sum(value), #calculate proportions
ymax = cumsum(prop), #calculate slice boundaries to see where they begin and end
ymin = lag(ymax, default = 0),
label_pos = (ymax + ymin) / 2, #place text in the center of each slice arc
label = percent(prop)
)
ggplot(df, aes(ymax = ymax, ymin = ymin, xmax = 4, xmin = 3, fill = category)) +
geom_rect() +
coord_polar(theta = "y") +
# percentage labels
geom_text(aes(x = 4.2, y = label_pos, label = label), size = 4) + #add percentage labels
xlim(2, 4.5) +
theme_void()
data <- data.frame(
category=c("A", "B", "C"),
count=c(10, 60, 30)
)
# Compute percentages
data$fraction <- data$count / sum(data$count)
# Compute the cumulative percentages (top of each rectangle)
data$ymax <- cumsum(data$fraction)
# Compute the bottom of each rectangle
data$ymin <- c(0, head(data$ymax, n=-1))
# Compute label position
data$labelPosition <- (data$ymax + data$ymin) / 2
# Compute a good label
data$label <- paste0(data$category, "\n value: ", data$count)
# Make the plot
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect() +
geom_label( x=3.5, aes(y=labelPosition, label=label), size=6) +
scale_fill_brewer(palette=4) +
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none")
install.packages("sunburstR")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(sunburstR)
library(dplyr)
sunburst_data <- data.frame(
sequence = c(
"Fruit-Apples",
"Fruit-Bananas",
"Fruit-Oranges",
"Vegetable-Carrots",
"Vegetable-Broccoli",
"Vegetable-Peppers"
),
value = c(30, 20, 25, 15, 18, 12)
)
sunburst(sunburst_data)
Think of this like a nested dout plot, that can show a category within a category. They are not ideal when:
library(treemap)
library(dplyr)
sales_data <- data.frame(
category = c("Fruit", "Fruit", "Fruit", "Vegetable", "Vegetable", "Vegetable"),
subcategory = c("Apples", "Bananas", "Oranges", "Carrots", "Broccoli", "Peppers"),
value = c(30, 20, 25, 15, 18, 12)
)
treemap(
sales_data,
index = c("category", "subcategory"),
vSize = "value",
title = "Treemap of Category and Subcategory Counts"
)
They can become hard to read if:
Circular packing visualizes nested hierarchical structures.
Large circles represent categories, while smaller circles inside represent subcategories.
This is useful for:
library(packcircles)
library(ggforce)
sizes <- c(50, 30, 20, 10, 5)
circle_data <- circleProgressiveLayout(sizes)
library(ggplot2)
ggplot(circle_data) +
geom_circle(aes(x0 = x, y0 = y, r = radius),
fill = "skyblue",
color = "black") +
coord_equal() +
theme_void()
Explanation
library(ggraph)
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ✔ purrr 1.2.1 ✔ tidyr 1.3.2
## ✔ readr 2.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%() masks igraph::%--%()
## ✖ tibble::as_data_frame() masks igraph::as_data_frame(), dplyr::as_data_frame()
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::compose() masks igraph::compose()
## ✖ tidyr::crossing() masks igraph::crossing()
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::simplify() masks igraph::simplify()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
##
## The following object is masked from 'package:scales':
##
## viridis_pal
# We need a data frame giving a hierarchical structure. Let's consider the flare dataset:
edges <- flare$edges
vertices <- flare$vertices
mygraph <- graph_from_data_frame( edges, vertices=vertices )
# Control the size of each circle: (use the size column of the vertices data frame)
ggraph(mygraph, layout = 'circlepack', weight=size) +
geom_node_circle() +
theme_void()
ggraph(mygraph, layout = 'circlepack', weight=size) +
geom_node_circle(aes(fill = depth)) + #add the fill designation
theme_void() +
theme(legend.position="top") +
scale_fill_viridis() #choose a color pallette
ggraph(mygraph, layout = 'circlepack', weight=size ) +
geom_node_circle(aes(fill = depth)) +
geom_node_text( aes(label=shortName, filter=leaf, fill=depth, size=size)) +
theme_void() +
theme(legend.position="FALSE") +
scale_fill_viridis()
## Warning in geom_node_text(aes(label = shortName, filter = leaf, fill = depth, :
## Ignoring unknown aesthetics: fill
Dendrograms visualize hierarchical clustering.
They show how observations group together based on similarity.
Common uses include:
# Load packages
library(ggraph)
library(igraph)
library(tidyverse)
# Step 1: Create the top level of the hierarchy
# One starting point called "origin" branches into 5 groups
level1 <- data.frame(
from = "origin",
to = paste0("group_", 1:5)
)
# Step 2: Create the second level of the hierarchy
# Each group branches into 5 subgroups
level2 <- data.frame(
from = rep(level1$to, each = 5),
to = paste0("subgroup_", 1:25)
)
# Step 3: Combine all connections into one edge list
edges <- bind_rows(level1, level2)
# Step 4: Turn the edge list into a graph object
graph <- graph_from_data_frame(edges)
# Step 5: Plot the hierarchy as a dendrogram
ggraph(graph, layout = "dendrogram", circular = FALSE) +
geom_edge_diagonal() +
geom_node_point() +
theme_void()
# Adding Labels
ggraph(graph, layout = 'dendrogram') +
geom_edge_diagonal() +
geom_node_text(aes( label=name, filter=leaf) , angle=90 , hjust=1, nudge_y = -0.01) + #add a text label to each of your nodes
ylim(-.4, NA)
ggraph(graph, layout = 'dendrogram') +
geom_edge_diagonal() +
geom_node_text(aes( label=name, filter=leaf) , angle=90 , hjust=1, nudge_y = -0.04) +
geom_node_point(aes(filter=leaf) , alpha=0.6) + #add points at the end of each line
ylim(-.5, NA)
Interpretation
ggraph(graph, layout = "dendrogram", circular = TRUE) +
geom_edge_diagonal() +
geom_node_point() +
theme_void()
ggraph(graph, layout = "dendrogram", circular = TRUE) +
geom_edge_link() + #change from curved lines to straight linkages
geom_node_point() +
theme_void()
#adding labels to circular plots is quite difficult.
Word clouds visualize text frequency.
Words that appear more often are displayed larger.
They are commonly used in:
library(wordcloud2)
wordcloud2(data=demoFreq, size=1.6)
# Gives a proposed palette
wordcloud2(demoFreq, size=1.6, color='random-dark')
# or a vector of colors. vector must be same length than input data
wordcloud2(demoFreq, size=1.6, color=rep_len( c("green","blue"), nrow(demoFreq) ) )
# Change the background color
wordcloud2(demoFreq, size=1.6, color='random-light', backgroundColor="black")
Explanation
You can custom the wordcloud shape using the shape argument. Available shapes are:
# Change the shape:
wordcloud2(demoFreq, size = 0.7, shape = 'star')
Use the mpg dataset to create a table counting the number of vehicles in each class. Then create a pie chart showing the distribution of vehicle classes.
Create a simple pie chart in base R using the following values: 3, 7, 9, 1, 2
Label the slices:Gr-A, Gr-B, Gr-C, Gr-D, Gr-E
Requirements:
library(ggplot2)
library(dplyr)
data("mpg")
class_counts <- mpg %>%
count(class)
ggplot(class_counts, aes(x = "", y = n, fill = class)) +
geom_col(width = 1) +
coord_polar("y") +
theme_void() +
labs(title = "Vehicle Class Distribution")
values <- c(3, 7, 9, 1, 2)
labels <- c("Gr-A", "Gr-B", "Gr-C", "Gr-D", "Gr-E")
pie(values, labels = labels, main = "Simple Pie Chart")
Questions
SUV
slices get tiny and crowded making it hard to compare sizes
Create a donut chart using this small dataset:
data <- data.frame(
category = c("A", "B", "C"),
count = c(10, 60, 30)
)
Add labels that show either:
library(ggplot2)
library(dplyr)
library(scales)
df <- data.frame(
category = c("A","B","C"),
value = c(10,60,30)
)
df <- df %>%
mutate(
prop = value / sum(value),
ymax = cumsum(prop),
ymin = lag(ymax, default = 0),
label_pos = (ymax + ymin) / 2,
label = paste0(category, " (", value, ") - ", percent(prop))
)
ggplot(df, aes(ymax = ymax, ymin = ymin, xmax = 4, xmin = 3, fill = category)) +
geom_rect() +
coord_polar(theta = "y") +
geom_text(aes(x = 3.5, y = label_pos, label = label), size = 4) +
xlim(2, 4.5) +
theme_void() +
labs(title = "Donut Chart")
Questions
SO that the text appears in the middle of each slice.
category + value is easier to read becsause it gives more information.
Create the following dataset:
sunburst_data <- data.frame(
sequence = c(
"Fruit-Apples",
"Fruit-Bananas",
"Fruit-Oranges",
"Vegetable-Carrots",
"Vegetable-Broccoli",
"Vegetable-Peppers"
),
value = c(30, 20, 25, 15, 18, 12)
)
Use the sunburstR package to create a sunburst plot.
install.packages("sunburstR")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(sunburstR)
sunburst_data <- data.frame(
sequence = c(
"Fruit-Apples",
"Fruit-Bananas",
"Fruit-Oranges",
"Vegetable-Carrots",
"Vegetable-Broccoli",
"Vegetable-Peppers"
),
value = c(30, 20, 25, 15, 18, 12)
)
sunburst(sunburst_data)
Using this dataset:
sales_data <- data.frame(
category = c("Fruit", "Fruit", "Fruit", "Vegetable", "Vegetable", "Vegetable"),
subcategory = c("Apples", "Bananas", "Oranges", "Carrots", "Broccoli", "Peppers"),
value = c(30, 20, 25, 15, 18, 12)
)
Create a treemap.
Requirements
install.packages("treemap")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(treemap)
sales_data <- data.frame(
category = c("Fruit", "Fruit", "Fruit", "Vegetable", "Vegetable", "Vegetable"),
subcategory = c("Apples", "Bananas", "Oranges", "Carrots", "Broccoli", "Peppers"),
value = c(30, 20, 25, 15, 18, 12)
)
treemap(
sales_data,
index = c("category", "subcategory"),
vSize = "value",
title = "Sales Treemap"
)
they both show hierarchical data and use size to represnt values
treemap uses rectangles, a sunburst chart uses cirlces and layers.
treemap because it is simpler and labels are clearer.
Create a simple hierarchy with:
Then convert it into a graph and plot it as a dendrogram. Add labels and points to the ends of the branches.
library(ggraph)
library(igraph)
library(tidyverse)
# create hierarchy
level1 <- data.frame(
from = "Origin",
to = paste0("Group", 1:4)
)
level2 <- data.frame(
from = rep(level1$to, each = 3),
to = paste0("Sub", 1:12)
)
edges <- bind_rows(level1, level2)
graph <- graph_from_data_frame(edges)
# dendrogram plot
ggraph(graph, layout = "dendrogram") +
geom_edge_diagonal() +
geom_node_text(aes(label = name, filter = leaf),
angle = 90, hjust = 1, nudge_y = -0.04) +
geom_node_point(aes(filter = leaf), alpha = 0.6) +
ylim(-0.5, NA) +
theme_void()
Run the following code to create a word based dataset from The State of the Union text. Annotate the lines to ensure you know how it was built. This uses a real speech and produces meaningful words.
library(janeaustenr)
library(tidytext)
library(dplyr)
library(wordcloud2)
text <- austen_books()
word_counts <- text %>%
unnest_tokens(word, text) %>%
count(word, sort = TRUE)
data("stop_words")
#Pay special attention to this. This is important for word clouds.
word_counts <- word_counts %>%
anti_join(stop_words)
## Joining with `by = join_by(word)`
Create a word cloud of the most commonly used words in the speech.
library(janeaustenr) # load text data
library(tidytext) # for splitting text into words
library(dplyr) # for data manipulation
library(wordcloud2) # for word cloud
text <- austen_books() # get text data
word_counts <- text %>%
unnest_tokens(word, text) %>% # split text into individual words
count(word, sort = TRUE) # count how often each word appears
data("stop_words") # common words like "the", "and"
word_counts <- word_counts %>%
anti_join(stop_words) # remove common filler words
## Joining with `by = join_by(word)`
wordcloud2(word_counts[1:100, ]) # make word cloud with top 100 words