#load Packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(gt)
library(glue)
library(ggtext)
AnscombeData.df <- read.csv("~/Desktop/2023-06-06_Glover_Wk4SmallMultiples/Week4HW/anscombe-data.csv")
AnscombeData.df
## id series x y
## 1 1 1 10 8.04
## 2 2 1 8 6.95
## 3 3 1 13 7.58
## 4 4 1 9 8.81
## 5 5 1 11 8.33
## 6 6 1 14 9.96
## 7 7 1 6 7.24
## 8 8 1 4 4.26
## 9 9 1 12 10.84
## 10 10 1 7 4.82
## 11 11 1 5 5.68
## 12 12 2 10 9.14
## 13 13 2 8 8.14
## 14 14 2 13 8.74
## 15 15 2 9 8.77
## 16 16 2 11 9.26
## 17 17 2 14 8.10
## 18 18 2 6 6.13
## 19 19 2 4 3.10
## 20 20 2 12 9.13
## 21 21 2 7 7.26
## 22 22 2 5 4.74
## 23 23 3 10 7.46
## 24 24 3 8 6.77
## 25 25 3 13 12.74
## 26 26 3 9 7.11
## 27 27 3 11 7.81
## 28 28 3 14 8.84
## 29 29 3 6 6.08
## 30 30 3 4 5.39
## 31 31 3 12 8.15
## 32 32 3 7 6.42
## 33 33 3 5 5.73
## 34 34 4 8 6.58
## 35 35 4 8 5.76
## 36 36 4 8 7.71
## 37 37 4 8 8.84
## 38 38 4 8 8.47
## 39 39 4 8 7.04
## 40 40 4 8 5.25
## 41 41 4 19 12.50
## 42 42 4 8 5.56
## 43 43 4 8 7.91
## 44 44 4 8 6.89
AnscombeData.df$id <- as.integer(AnscombeData.df$id)
AnscombeData.df$series <- as.integer(AnscombeData.df$series)
head(AnscombeData.df)
## id series x y
## 1 1 1 10 8.04
## 2 2 1 8 6.95
## 3 3 1 13 7.58
## 4 4 1 9 8.81
## 5 5 1 11 8.33
## 6 6 1 14 9.96
###View Summary Data
summary(AnscombeData.df)
## id series x y
## Min. : 1.00 Min. :1.00 Min. : 4 Min. : 3.100
## 1st Qu.:11.75 1st Qu.:1.75 1st Qu.: 7 1st Qu.: 6.117
## Median :22.50 Median :2.50 Median : 8 Median : 7.520
## Mean :22.50 Mean :2.50 Mean : 9 Mean : 7.501
## 3rd Qu.:33.25 3rd Qu.:3.25 3rd Qu.:11 3rd Qu.: 8.748
## Max. :44.00 Max. :4.00 Max. :19 Max. :12.740
summary_stats <- AnscombeData.df %>%
group_by(series) %>%
summarize(mean_x = mean(x),
sd_x = sd(x),
mean_y = mean(y),
sd_y = sd(y),
r_squared = cor(x, y)^2) %>%
mutate(intercept = mean_y - r_squared * sd_y / sd_x * mean_x,
slope = r_squared * sd_y / sd_x)
summary_stats
## # A tibble: 4 × 8
## series mean_x sd_x mean_y sd_y r_squared intercept slope
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 9 3.32 7.50 2.03 0.667 3.83 0.408
## 2 2 9 3.32 7.50 2.03 0.666 3.83 0.408
## 3 3 9 3.32 7.5 2.03 0.666 3.83 0.408
## 4 4 9 3.32 7.50 2.03 0.667 3.83 0.408
SPData <- AnscombeData.df %>%
ggplot(aes(x = x, y = y)) +
geom_point() +
facet_wrap(~ series, ncol = 2) +
geom_smooth(method = "lm", se = FALSE, color = "darkblue") +
geom_text(data = summary_stats, aes(label = paste0("Mean X: ", round(mean_x, 2), "\nMean Y: ", round(mean_y, 2),
"\nSD X: ", round(sd_x, 2), "\nSD Y: ", round(sd_y, 2),
"\nR^2: ", round(r_squared, 2))),
x = Inf, y = Inf, hjust = 1, vjust = 1, color = "black", size = 4) +
geom_text(data = summary_stats, aes(label = paste0("y = ", round(intercept, 2), " + ", round(slope, 2), "x")),
x = Inf, y = -Inf, hjust = 1, vjust = 0, color = "black", size = 4) +
geom_vline(data = summary_stats, aes(xintercept = intercept), color = "black", linetype = "solid") +
geom_hline(data = summary_stats, aes(yintercept = intercept), color = "black", linetype = "solid") +
ggtitle("Anscombe Quartet") +
theme(plot.title = element_text(family = "Times New Roman", face = "bold", hjust = 0.5))
SPData
## `geom_smooth()` using formula = 'y ~ x'