# Load libraries
library(tidyverse)
library(ggplot2)
library(gt)
library(glue)
library(ggtext)
# Load data and set dataframe
anscombe_tidy <- read_csv("anscombes.csv")
Before I plot my anscombe_tidy dataframe, I need to prepare the summary statistics that I’ll use as labels. I can calculate summary statistics using the functions group_by and summarize and then set the results as a new dataframe anscombe_summary.
# Create dataframe with summary statistics
anscombe_summary <- anscombe_tidy %>%
group_by(dataset) %>%
summarise(
mean_x = mean(x),
mean_y = mean(y),
sd_x = sd(x),
sd_y = sd(y),
r_square = cor(x,y)) %>%
mutate_if(is.numeric, round, digits = 2)
anscombe_summary
## # A tibble: 4 x 6
## dataset mean_x mean_y sd_x sd_y r_square
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 I 9 7.5 3.32 2.03 0.82
## 2 II 9 7.5 3.32 2.03 0.82
## 3 III 9 7.5 3.32 2.03 0.82
## 4 IV 9 7.5 3.32 2.03 0.82
Now that I’ve calculated the summary statistics I’ll need, I can go ahead and plot Anscombe’s quartet using the function ggplot(). I can show the quartet as a series of small multiples with the function facet_wrap() and label each plot with the function geom_richtext().
# Plot Anscombe's quartet and label with summary statistics
dataset <- anscombe_summary$dataset
MeanX <- anscombe_summary$mean_x
MeanY <- anscombe_summary$mean_y
sdX <- anscombe_summary$sd_x
sdY <- anscombe_summary$sd_y
anscombe_tidy %>%
ggplot(aes(x, y,)) +
geom_point(aes(color = dataset)) +
geom_point(data = anscombe_summary, aes (MeanX, MeanY), color = "black", size = 4, shape = 19) +
geom_point(data = anscombe_summary, aes (MeanX + sdX, MeanY+ sdY), color = "black", size = 3, shape = 15) +
geom_point(data = anscombe_summary, aes (MeanX - sdX, MeanY- sdY), color = "black", size = 3, shape = 15) +
geom_smooth(method=lm, se=TRUE, color = "blue") +
geom_richtext(data=anscombe_summary,
aes(x=mean_x+7,
y=mean_x+5,
color = dataset,
label= glue("r^2 = {r_square}"),
fill = after_scale(alpha(color, .2))))+
geom_richtext(data=anscombe_summary,
aes(x=mean_x,
y=mean_x+4,
color = dataset,
label= glue("mean = <br> ({MeanX}, {MeanY})"),
fill = after_scale(alpha(color, .2))))+
geom_richtext(data=anscombe_summary,
aes(x=sd_x+10,
y=sd_y+3,
color = dataset,
label= glue(" sd = <br> +-({sdX}, {sdY})"),
fill = after_scale(alpha(color, .2))))+
theme(legend.position = "none") +
facet_wrap(~ dataset, ncol = 2) +
labs(title = "Absombe's Quartet",
x= "X",
y= "Y",
subtitle = "Demonstrates why visualization is necessary",
caption = "4 Data Sets")