Use the mpg dataset to create a table counting the number of vehicles in each class. Then create a pie chart showing the distribution of vehicle classes.
Create a simple pie chart in base R using the following values: 3, 7, 9, 1, 2
Label the slices:Gr-A, Gr-B, Gr-C, Gr-D, Gr-E
Requirements:
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# data
data(mpg)
# count vehicle class
count_dat <- mpg %>%
count(class)
#head(count_dat)
# pie chart 1 not in base r
count_dat %>%
ggplot(aes(x = "", y = n, fill = class)) +
geom_col() +
coord_polar(theta = "y") +
theme_void() +
labs(title = "Vehicle Class Distribution")
# pie chart 2 in base r
dat1 <- c(3,7,9,1,2)
labs1 <- c("Gr-A", "Gr-B", "Gr-C", "Gr-D", "Gr-E")
pie(dat1, labels = labs1)
Questions
SUVIt's difficult to distinguish counts from pie slices.Create a donut chart using this small dataset:
data2 <- data.frame(
category = c("A", "B", "C"),
count = c(10, 60, 30)
)
# base r pie chart
pie(data2$count, labels = data2$category)
# ggplot donut pie chart
library(ggplot2)
library(dplyr)
data2 %>%
ggplot(aes(x = 2, y = count, fill = category)) +
geom_col() +
coord_polar("y") +
xlim(0.5, 2.5) +
theme_void() +
labs(title = "Donut Pie Chart")
Add labels that show either:
library(dplyr)
library(scales)
library(ggplot2)
# data
data2_new <- data2 %>%
mutate(
prop = count / sum(count), # calc proportions
ymax = cumsum(prop), # sliced boundaries
ymin = lag(ymax, default = 0),
label_pos = (ymax + ymin) / 2, # text placement
label = percent(prop) # label
)
# ggplot donut pie chart
ggplot(data2_new, aes(ymax = ymax, ymin = ymin, xmax = 4, xmin = 3, fill = category)) +
geom_rect() +
coord_polar(theta = "y") +
# percent label
geom_text(aes(x = 3.5, y = label_pos, label = label), size = 4) +
theme_void() +
xlim(2, 4.5) +
labs(title = "Donut Pie Chart With Percentages")
Questions
If we didn't, the labels would not be seenDepends on the data, category and vlaue coudl provide more information but percentages are more simple.Create the following dataset:
sunburst_data <- data.frame(
sequence = c(
"Fruit-Apples",
"Fruit-Bananas",
"Fruit-Oranges",
"Vegetable-Carrots",
"Vegetable-Broccoli",
"Vegetable-Peppers"
),
value = c(30, 20, 25, 15, 18, 12)
)
Use the sunburstR package to create a sunburst plot.
#install.packages("sunburstR")
library(sunburstR)
## Warning: package 'sunburstR' was built under R version 4.5.3
sunburst(data = sunburst_data)
Using this dataset:
sales_data <- data.frame(
category = c("Fruit", "Fruit", "Fruit", "Vegetable", "Vegetable", "Vegetable"),
subcategory = c("Apples", "Bananas", "Oranges", "Carrots", "Broccoli", "Peppers"),
value = c(30, 20, 25, 15, 18, 12)
)
Create a treemap.
Requirements
#install.packages("treemap")
library(treemap)
## Warning: package 'treemap' was built under R version 4.5.3
library(dplyr)
library(ggplot2)
sales_data %>%
treemap(index = c("category", "subcategory"),
vSize = "value",
title = "Tree Map Fruits and Veggies")
they both display categories and subcategories of data with area being the marker for frequency or counts of the variablesno live interaction,treemapsCreate a simple hierarchy with:
Then convert it into a graph and plot it as a dendrogram. Add labels and points to the ends of the branches.
# library
#install.packages("ggraph")
library(ggraph)
## Warning: package 'ggraph' was built under R version 4.5.3
library(igraph)
## Warning: package 'igraph' was built under R version 4.5.3
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.2.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%() masks igraph::%--%()
## ✖ tibble::as_data_frame() masks igraph::as_data_frame(), dplyr::as_data_frame()
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::compose() masks igraph::compose()
## ✖ tidyr::crossing() masks igraph::crossing()
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::simplify() masks igraph::simplify()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
# level 1
level1 <- data.frame(
from = "origin",
to = paste0("group", 1:4)
)
# level 2
level2 <- data.frame(
from = level1$to, each = 3,
to = paste0("subgroup", 1:12)
)
# combine levels
edges <- bind_rows(level1, level2)
#View(edges)
# turn list into graph-able object
graph1 <- graph_from_data_frame(edges)
#View(graph1)
# plot as dendrogram
ggraph::autograph(graph1, geom_nod
) +
theme_void() +
labs(title = "Dendrogram")
Run the following code to create a word based dataset from The State of the Union text. Annotate the lines to ensure you know how it was built. This uses a real speech and produces meaningful words.
library(janeaustenr)
## Warning: package 'janeaustenr' was built under R version 4.5.3
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.5.3
library(dplyr)
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.5.3
text <- austen_books() # creating new data frame from the book chapters
word_counts <- text %>% # counts of each word in the text
unnest_tokens(word, text) %>%
count(word, sort = TRUE)
data("stop_words") # loads the stop words like "and"
#Pay special attention to this. This is important for word clouds.
word_counts <- word_counts %>% # removes the stop words from our data frame
anti_join(stop_words)
## Joining with `by = join_by(word)`
Create a word cloud of the most commonly used words in the speech.
wordcloud2(word_counts)