install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(diamonds)
glimpse(diamonds)
## Rows: 53,940
## Columns: 10
## $ carat <dbl> 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, 0.…
## $ cut <ord> Ideal, Premium, Good, Premium, Good, Very Good, Very Good, Ver…
## $ color <ord> E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E, I, J, J, J, I,…
## $ clarity <ord> SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, VS1, SI1, VS1, …
## $ depth <dbl> 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, 64…
## $ table <dbl> 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58…
## $ price <int> 326, 326, 327, 334, 335, 336, 336, 337, 337, 338, 339, 340, 34…
## $ x <dbl> 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, 3.87, 4.00, 4.…
## $ y <dbl> 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.…
## $ z <dbl> 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.…
ggplot(mtcars,
aes(x = wt, y = mpg, size = hp)) +
geom_point(alpha = 0.7, color = "steelblue") +
scale_size(range = c(3, 10)) +
theme_minimal() +
labs(
title = "Fuel Efficiency vs Vehicle Weight",
subtitle = "Bubble size represents horsepower",
x = "Weight (1000 lbs)",
y = "Miles per Gallon",
size = "Horsepower"
)
Compare diamond price across cut quality.
ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot()
#improve readability by rotating axis text
ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot(fill = "steelblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#add transparency
ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot(fill = "steelblue", alpha=0.6) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(diamonds, aes(x = cut, y = price)) +
geom_violin(fill = "skyblue", alpha = 0.6)
# Violin with boxplot overlay
ggplot(diamonds, aes(x = cut, y = price)) +
geom_violin(fill = "lightblue", alpha = 0.4, trim = FALSE) +
geom_boxplot(width = 0.1, alpha = 0.7)
ggplot(diamonds, aes(x = cut, y = price)) +
geom_violin(fill = "lightblue", alpha = 0.4, trim = FALSE) +
geom_boxplot(width = 0.1, alpha = 0.7, outlier.size = 0.5, outlier.colour = "red")
ggplot(mpg, aes(x = class, y = hwy)) +
geom_boxplot(outlier.shape = NA) + # hide default outliers
geom_jitter(width = 0.2, height = 0, alpha = 0.6, color = "red", size = 2) +
theme_minimal() +
labs(
title = "Highway MPG by Vehicle Class with Jittered Outliers",
x = "Vehicle Class",
y = "Highway MPG"
)
library(dplyr)
mpg_outliers <- mpg %>%
group_by(class) %>%
mutate(q1 = quantile(hwy, 0.25),
q3 = quantile(hwy, 0.75),
iqr = q3 - q1,
is_outlier = hwy < (q1 - 1.5*iqr) | hwy > (q3 + 1.5*iqr)) %>%
filter(is_outlier)
# Plot
ggplot(mpg, aes(x = class, y = hwy)) +
geom_boxplot() + #not this is keeping the outliers already.
geom_jitter(data = mpg_outliers, width = 0.2, color = "red", size = 2, alpha = 0.8) +
theme_minimal()
ggplot(mpg, aes(x = class, y = hwy)) +
geom_boxplot(outlier.shape = NA) + # hide default outliers
geom_jitter(data = mpg_outliers, width = 0.2, color = "red", size = 2, alpha = 0.8) +
theme_minimal()
#Rounding axis labels
ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot() +
scale_y_continuous(labels = scales::label_number(accuracy = 100))
ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot() +
coord_cartesian(ylim = c(0, 5000))
ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot() +
scale_y_continuous(limits=c(0, 5000))
## Warning: Removed 14714 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
scale_y_continuous(limits = …) → removes data outside range coord_cartesian() → zooms without removing data
ggplot(diamonds, aes(x = cut, y = price, fill = cut)) +
geom_boxplot() +
scale_fill_brewer(palette = "Set2")
#Viridis is colorblind friendly
ggplot(diamonds, aes(x = cut, y = price, fill = cut)) +
geom_boxplot() +
scale_fill_viridis_d() #_d = discrete
ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot(fill = "steelblue") +
labs(
title = "Diamond Prices by Cut Quality",
subtitle = "Boxplots show distribution of price across cut categories",
x = "Cut Quality",
y = "Price (USD)",
caption = "Source: ggplot2 diamonds dataset"
)
ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot(fill = "steelblue") +
labs(
title = "Diamond Prices by Cut Quality",
subtitle = "Boxplots show distribution of price across cut categories",
x = "Cut Quality",
y = "Price (USD)",
caption = "Source: ggplot2 diamonds dataset"
) + theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 8, hjust = 1)
)
# Discuss the ordering of themes!
ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot(fill = "steelblue") +
labs(
title = "Diamond Prices by Cut Quality",
subtitle = "Boxplots show distribution of price across cut categories",
x = "Cut Quality",
y = "Price (USD)",
caption = "Source: ggplot2 diamonds dataset"
) + theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 8, hjust = 1)
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text = element_text(size = 12)) +
theme_minimal () ########## ORDER MATTERS
ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot(fill = "steelblue") +
labs(
title = "Diamond Prices by Cut Quality",
subtitle = "Boxplots show distribution of price across cut categories",
x = "Cut Quality",
y = "Price (USD)",
caption = "Source: ggplot2 diamonds dataset"
) + theme_minimal () +########## ORDER MATTERS
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 8, hjust = 1)
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text = element_text(size = 12))
In plain English:
Group the data by cut
Compute the median price within each cut
Reorder the cut categories from lowest median price to highest
Use that new order on the x-axis
#What if you dont link the order that it has set by default?
ggplot(diamonds, aes(x = reorder(cut, price, median), y = price)) + #“Reorder the levels of cut based on the median value of price within each cut.”
geom_boxplot(fill = "steelblue") +
coord_cartesian(ylim = c(0, 5000))
ggplot(diamonds, aes(x = reorder(cut, -price, median), y = price)) + #“Reorder the levels of cut based on the median value of price within each cut.”
geom_boxplot(fill = "steelblue") +
coord_cartesian(ylim = c(0, 5000))
#what if we add "decr" to reverse the order?
#⚠ desc() does not work inside base reorder().
ggplot(diamonds, aes(x = cut, y = price, fill = cut)) +
geom_violin(alpha = 0.4, trim = FALSE) +
geom_boxplot(width = 0.1, alpha = 0.7) +
scale_fill_viridis_d() +
coord_cartesian(ylim = c(0, 12000)) +
labs(
title = "Distribution of Diamond Prices by Cut",
subtitle = "Violin + Boxplot Comparison",
x = "Cut Quality",
y = "Price (USD)",
caption = "Source: ggplot2 diamonds dataset"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold")
)
mtcars$cyl <- as.factor(mtcars$cyl)
mtcars$gear <- as.factor(mtcars$gear)
ggplot(mtcars, aes(x = cyl)) +
geom_bar()
#geom_bar() by default uses stat = "count"
#It counts observations in each category
#No y variable needed
library(dplyr)
#If you want more than counts, you will need a summary table first
avg_mpg <- mtcars %>%
group_by(cyl) %>%
summarise(mean_mpg = mean(mpg))
ggplot(avg_mpg, aes(x = cyl, y = mean_mpg)) +
geom_col()
#geom_bar() → counts
#geom_col() → uses provided y values
# This distinction confuses students constantly.
#stacked barplots
ggplot(mtcars, aes(x = cyl, fill = gear)) +
geom_bar()
#What happens:
#Bars represent total count of cars per cylinder group
#Segments show distribution of gear types
#⚠ Hard to compare middle segments
#⚠ Hard to compare across bars
#⚠ Order of stacking affects readability
# Add proportions instead!
ggplot(mtcars, aes(x = cyl, fill = gear)) +
geom_bar(position = "fill")
#Each bar = 100%
#Shows composition, not count
#grouped barplots
ggplot(mtcars, aes(x = cyl, y = mpg, fill = gear)) +
stat_summary(fun = mean,
geom = "col",
position = "dodge")
#stat_summary() calculates summary inside ggplot
#Compare with pre-summarized approach
#The additional of error bars
ggplot(mtcars, aes(x = cyl, y = mpg, fill = gear)) +
stat_summary(fun = mean,
geom = "col",
position = position_dodge(width = 0.9)) +
stat_summary(fun.data = mean_se,
geom = "errorbar",
position = position_dodge(width = 0.9),
width = 0.2)
ggplot(mpg, aes(x = class, y = hwy, fill = drv)) +
geom_boxplot()+
labs(
title = "Highway MPG by Vehicle Class",
subtitle = "Distrubution of highway fuel efficeiency across vehicle classes",
caption = "Source: ggplot mpg dataset",
x = "Vehicle Class",
y = "Highway MPG"
)
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## <theme> List of 144
## $ line : <ggplot2::element_line>
## ..@ colour : chr "black"
## ..@ linewidth : num 0.5
## ..@ linetype : num 1
## ..@ lineend : chr "butt"
## ..@ linejoin : chr "round"
## ..@ arrow : logi FALSE
## ..@ arrow.fill : chr "black"
## ..@ inherit.blank: logi TRUE
## $ rect : <ggplot2::element_rect>
## ..@ fill : chr "white"
## ..@ colour : chr "black"
## ..@ linewidth : num 0.5
## ..@ linetype : num 1
## ..@ linejoin : chr "round"
## ..@ inherit.blank: logi TRUE
## $ text : <ggplot2::element_text>
## ..@ family : chr ""
## ..@ face : chr "plain"
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : chr "black"
## ..@ size : num 11
## ..@ hjust : num 0.5
## ..@ vjust : num 0.5
## ..@ angle : num 0
## ..@ lineheight : num 0.9
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 0 0
## ..@ debug : logi FALSE
## ..@ inherit.blank: logi TRUE
## $ title : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ point : <ggplot2::element_point>
## ..@ colour : chr "black"
## ..@ shape : num 19
## ..@ size : num 1.5
## ..@ fill : chr "white"
## ..@ stroke : num 0.5
## ..@ inherit.blank: logi TRUE
## $ polygon : <ggplot2::element_polygon>
## ..@ fill : chr "white"
## ..@ colour : chr "black"
## ..@ linewidth : num 0.5
## ..@ linetype : num 1
## ..@ linejoin : chr "round"
## ..@ inherit.blank: logi TRUE
## $ geom : <ggplot2::element_geom>
## ..@ ink : chr "black"
## ..@ paper : chr "white"
## ..@ accent : chr "#3366FF"
## ..@ linewidth : num 0.5
## ..@ borderwidth: num 0.5
## ..@ linetype : int 1
## ..@ bordertype : int 1
## ..@ family : chr ""
## ..@ fontsize : num 3.87
## ..@ pointsize : num 1.5
## ..@ pointshape : num 19
## ..@ colour : NULL
## ..@ fill : NULL
## $ spacing : 'simpleUnit' num 5.5points
## ..- attr(*, "unit")= int 8
## $ margins : <ggplot2::margin> num [1:4] 5.5 5.5 5.5 5.5
## $ aspect.ratio : NULL
## $ axis.title : NULL
## $ axis.title.x : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 1
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 2.75 0 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.title.x.top : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 0
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 2.75 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.title.x.bottom : NULL
## $ axis.title.y : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 1
## ..@ angle : num 90
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 2.75 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.title.y.left : NULL
## $ axis.title.y.right : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 1
## ..@ angle : num -90
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 0 2.75
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : chr "#4D4D4DFF"
## ..@ size : 'rel' num 0.8
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.x : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 1
## ..@ vjust : num 1
## ..@ angle : num 45
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 2.2 0 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi FALSE
## $ axis.text.x.top : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 4.95 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.x.bottom : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 4.95 0 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.y : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 1
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 2.2 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.y.left : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 4.95 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.y.right : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 0 4.95
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.theta : NULL
## $ axis.text.r : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 0.5
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 2.2 0 2.2
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.ticks : <ggplot2::element_blank>
## $ axis.ticks.x : NULL
## $ axis.ticks.x.top : NULL
## $ axis.ticks.x.bottom : NULL
## $ axis.ticks.y : NULL
## $ axis.ticks.y.left : NULL
## $ axis.ticks.y.right : NULL
## $ axis.ticks.theta : NULL
## $ axis.ticks.r : NULL
## $ axis.minor.ticks.x.top : NULL
## $ axis.minor.ticks.x.bottom : NULL
## $ axis.minor.ticks.y.left : NULL
## $ axis.minor.ticks.y.right : NULL
## $ axis.minor.ticks.theta : NULL
## $ axis.minor.ticks.r : NULL
## $ axis.ticks.length : 'rel' num 0.5
## $ axis.ticks.length.x : NULL
## $ axis.ticks.length.x.top : NULL
## $ axis.ticks.length.x.bottom : NULL
## $ axis.ticks.length.y : NULL
## $ axis.ticks.length.y.left : NULL
## $ axis.ticks.length.y.right : NULL
## $ axis.ticks.length.theta : NULL
## $ axis.ticks.length.r : NULL
## $ axis.minor.ticks.length : 'rel' num 0.75
## $ axis.minor.ticks.length.x : NULL
## $ axis.minor.ticks.length.x.top : NULL
## $ axis.minor.ticks.length.x.bottom: NULL
## $ axis.minor.ticks.length.y : NULL
## $ axis.minor.ticks.length.y.left : NULL
## $ axis.minor.ticks.length.y.right : NULL
## $ axis.minor.ticks.length.theta : NULL
## $ axis.minor.ticks.length.r : NULL
## $ axis.line : <ggplot2::element_blank>
## $ axis.line.x : NULL
## $ axis.line.x.top : NULL
## $ axis.line.x.bottom : NULL
## $ axis.line.y : NULL
## $ axis.line.y.left : NULL
## $ axis.line.y.right : NULL
## $ axis.line.theta : NULL
## $ axis.line.r : NULL
## $ legend.background : <ggplot2::element_blank>
## $ legend.margin : NULL
## $ legend.spacing : 'rel' num 2
## $ legend.spacing.x : NULL
## $ legend.spacing.y : NULL
## $ legend.key : <ggplot2::element_blank>
## $ legend.key.size : 'simpleUnit' num 1.2lines
## ..- attr(*, "unit")= int 3
## $ legend.key.height : NULL
## $ legend.key.width : NULL
## $ legend.key.spacing : NULL
## $ legend.key.spacing.x : NULL
## $ legend.key.spacing.y : NULL
## $ legend.key.justification : NULL
## $ legend.frame : NULL
## $ legend.ticks : NULL
## $ legend.ticks.length : 'rel' num 0.2
## $ legend.axis.line : NULL
## $ legend.text : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : 'rel' num 0.8
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ legend.text.position : NULL
## $ legend.title : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 0
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ legend.title.position : NULL
## $ legend.position : chr "right"
## $ legend.position.inside : NULL
## $ legend.direction : NULL
## $ legend.byrow : NULL
## $ legend.justification : chr "center"
## $ legend.justification.top : NULL
## $ legend.justification.bottom : NULL
## $ legend.justification.left : NULL
## $ legend.justification.right : NULL
## $ legend.justification.inside : NULL
## [list output truncated]
## @ complete: logi TRUE
## @ validate: logi TRUE
Questions:
1.Which vehicle class has the highest median highway mpg?
The subcompact vehicle class has the highest median highway mpg.
The subcompact also has the largest variability because it shows the largest spread.
Create an alteration of this plot where the outliers are shown in a way in which they do not overlap (consider jittering and coloring).
boxplot_outliers <- mpg %>%
group_by(class) %>%
mutate(q1 = quantile(hwy, 0.25),
q3 = quantile(hwy, 0.75),
iqr = q3-q1,
is_outlier = hwy < (q1 - 1.5*iqr) | hwy > (q3 + 1.5*iqr)) %>%
filter(is_outlier)
ggplot(mpg, aes(x = class, y = hwy, fill = drv)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(data = boxplot_outliers, width = 0.2, color = "Pink", size = 2, alpha = 0.5) +
labs(
title = "Highway MPG by Vehicle Class",
subtitle = "created by Hope Parrish",
caption = "Source: ggplot mpg dataset",
x = "Vehicle Class",
y = "Highway MPG"
)
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## <theme> List of 144
## $ line : <ggplot2::element_line>
## ..@ colour : chr "black"
## ..@ linewidth : num 0.5
## ..@ linetype : num 1
## ..@ lineend : chr "butt"
## ..@ linejoin : chr "round"
## ..@ arrow : logi FALSE
## ..@ arrow.fill : chr "black"
## ..@ inherit.blank: logi TRUE
## $ rect : <ggplot2::element_rect>
## ..@ fill : chr "white"
## ..@ colour : chr "black"
## ..@ linewidth : num 0.5
## ..@ linetype : num 1
## ..@ linejoin : chr "round"
## ..@ inherit.blank: logi TRUE
## $ text : <ggplot2::element_text>
## ..@ family : chr ""
## ..@ face : chr "plain"
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : chr "black"
## ..@ size : num 11
## ..@ hjust : num 0.5
## ..@ vjust : num 0.5
## ..@ angle : num 0
## ..@ lineheight : num 0.9
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 0 0
## ..@ debug : logi FALSE
## ..@ inherit.blank: logi TRUE
## $ title : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ point : <ggplot2::element_point>
## ..@ colour : chr "black"
## ..@ shape : num 19
## ..@ size : num 1.5
## ..@ fill : chr "white"
## ..@ stroke : num 0.5
## ..@ inherit.blank: logi TRUE
## $ polygon : <ggplot2::element_polygon>
## ..@ fill : chr "white"
## ..@ colour : chr "black"
## ..@ linewidth : num 0.5
## ..@ linetype : num 1
## ..@ linejoin : chr "round"
## ..@ inherit.blank: logi TRUE
## $ geom : <ggplot2::element_geom>
## ..@ ink : chr "black"
## ..@ paper : chr "white"
## ..@ accent : chr "#3366FF"
## ..@ linewidth : num 0.5
## ..@ borderwidth: num 0.5
## ..@ linetype : int 1
## ..@ bordertype : int 1
## ..@ family : chr ""
## ..@ fontsize : num 3.87
## ..@ pointsize : num 1.5
## ..@ pointshape : num 19
## ..@ colour : NULL
## ..@ fill : NULL
## $ spacing : 'simpleUnit' num 5.5points
## ..- attr(*, "unit")= int 8
## $ margins : <ggplot2::margin> num [1:4] 5.5 5.5 5.5 5.5
## $ aspect.ratio : NULL
## $ axis.title : NULL
## $ axis.title.x : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 1
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 2.75 0 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.title.x.top : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 0
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 2.75 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.title.x.bottom : NULL
## $ axis.title.y : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 1
## ..@ angle : num 90
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 2.75 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.title.y.left : NULL
## $ axis.title.y.right : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 1
## ..@ angle : num -90
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 0 2.75
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : chr "#4D4D4DFF"
## ..@ size : 'rel' num 0.8
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.x : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 1
## ..@ vjust : num 1
## ..@ angle : num 45
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 2.2 0 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi FALSE
## $ axis.text.x.top : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 4.95 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.x.bottom : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 4.95 0 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.y : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 1
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 2.2 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.y.left : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 4.95 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.y.right : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 0 4.95
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.theta : NULL
## $ axis.text.r : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 0.5
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 2.2 0 2.2
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.ticks : <ggplot2::element_blank>
## $ axis.ticks.x : NULL
## $ axis.ticks.x.top : NULL
## $ axis.ticks.x.bottom : NULL
## $ axis.ticks.y : NULL
## $ axis.ticks.y.left : NULL
## $ axis.ticks.y.right : NULL
## $ axis.ticks.theta : NULL
## $ axis.ticks.r : NULL
## $ axis.minor.ticks.x.top : NULL
## $ axis.minor.ticks.x.bottom : NULL
## $ axis.minor.ticks.y.left : NULL
## $ axis.minor.ticks.y.right : NULL
## $ axis.minor.ticks.theta : NULL
## $ axis.minor.ticks.r : NULL
## $ axis.ticks.length : 'rel' num 0.5
## $ axis.ticks.length.x : NULL
## $ axis.ticks.length.x.top : NULL
## $ axis.ticks.length.x.bottom : NULL
## $ axis.ticks.length.y : NULL
## $ axis.ticks.length.y.left : NULL
## $ axis.ticks.length.y.right : NULL
## $ axis.ticks.length.theta : NULL
## $ axis.ticks.length.r : NULL
## $ axis.minor.ticks.length : 'rel' num 0.75
## $ axis.minor.ticks.length.x : NULL
## $ axis.minor.ticks.length.x.top : NULL
## $ axis.minor.ticks.length.x.bottom: NULL
## $ axis.minor.ticks.length.y : NULL
## $ axis.minor.ticks.length.y.left : NULL
## $ axis.minor.ticks.length.y.right : NULL
## $ axis.minor.ticks.length.theta : NULL
## $ axis.minor.ticks.length.r : NULL
## $ axis.line : <ggplot2::element_blank>
## $ axis.line.x : NULL
## $ axis.line.x.top : NULL
## $ axis.line.x.bottom : NULL
## $ axis.line.y : NULL
## $ axis.line.y.left : NULL
## $ axis.line.y.right : NULL
## $ axis.line.theta : NULL
## $ axis.line.r : NULL
## $ legend.background : <ggplot2::element_blank>
## $ legend.margin : NULL
## $ legend.spacing : 'rel' num 2
## $ legend.spacing.x : NULL
## $ legend.spacing.y : NULL
## $ legend.key : <ggplot2::element_blank>
## $ legend.key.size : 'simpleUnit' num 1.2lines
## ..- attr(*, "unit")= int 3
## $ legend.key.height : NULL
## $ legend.key.width : NULL
## $ legend.key.spacing : NULL
## $ legend.key.spacing.x : NULL
## $ legend.key.spacing.y : NULL
## $ legend.key.justification : NULL
## $ legend.frame : NULL
## $ legend.ticks : NULL
## $ legend.ticks.length : 'rel' num 0.2
## $ legend.axis.line : NULL
## $ legend.text : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : 'rel' num 0.8
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ legend.text.position : NULL
## $ legend.title : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 0
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ legend.title.position : NULL
## $ legend.position : chr "right"
## $ legend.position.inside : NULL
## $ legend.direction : NULL
## $ legend.byrow : NULL
## $ legend.justification : chr "center"
## $ legend.justification.top : NULL
## $ legend.justification.bottom : NULL
## $ legend.justification.left : NULL
## $ legend.justification.right : NULL
## $ legend.justification.inside : NULL
## [list output truncated]
## @ complete: logi TRUE
## @ validate: logi TRUE
library(ggplot2)
ggplot(mpg, aes(x = class, y = cty, fill = class)) +
geom_violin(alpha = 0.4, color = "pink") + # transparent violin
geom_boxplot(width = 0.1, fill = "blue", alpha = 0.7, outlier.shape = 16) + # overlay boxplot
labs(
title = "City MPG by Vehicle Class",
subtitle = "Violin plots showing distributions with boxplots for summary statistics",
x = "Vehicle Class",
y = "City MPG",
caption = "Source: ggplot2 mpg dataset"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
)
Questions:
The violin plot adds more information because it shows the density of the data. The boxplot does not show the full shape of the distribution.
The subcmpact class has a widespread of MPG values. The compact shows many outlier above the main distrbution. The pickup shows narrow and skewed toward lower MPG.
ggplot(diamonds, aes(x = cut))+
geom_bar () +
labs(title = "Number of Diamonds by Cut",
x = "cut",
y = "count")
ggplot(data = diamonds, aes(x = cut, fill = color)) +
geom_bar (position = "stack")
ggplot(data = diamonds, aes(x = cut, fill = color)) +
geom_bar(position = "dodge")
Questions:
A stacked barplot shows two varibales at the same tim including the totaly number of diamonds for each cut and how those diamonds are distributed across colors. A simple barplot only shows the total number of diamonds for each cut.
Dataset: diamonds
library(dplyr)
library(ggplot2)
avg_price_cut <- diamonds %>%
group_by(cut) %>%
summarise(avg_price = mean(price))
avg_price_cut <- avg_price_cut %>%
mutate(cut = reorder(cut, avg_price))
ggplot(avg_price_cut, aes(x = cut, y = avg_price)) +
geom_col(fill = "purple") +
labs(
title = "Average Diamond Price by Cut",
subtitle = "Using the diamonds dataset",
x = "Diamond Cut",
y = "Average Price (USD)",
caption = "Source: ggplot2 diamonds dataset"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Create the same plot using the stat_summary approach. Add error bars to the plot.
ggplot(diamonds, aes(x = reorder(cut, price, FUN = mean), y = price, fill = clarity)) +
stat_summary(fun = mean, geom = "col", position = "dodge") +
stat_summary(
fun.data = mean_se,
geom = "errorbar",
position = position_dodge(width = 0.9),
width = 0.2
) +
labs(
title = "Average Diamond Price by Cut and Clarity",
subtitle = "Means calculated using stat_summary() with standard error bars",
x = "Diamond Cut",
y = "Average Price (USD)",
fill = "Clarity",
caption = "Source: ggplot2 diamonds dataset"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))