#load Packages

Load libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(gt)
library(glue)
library(ggtext)

Load Data

AnscombeData.df <- read.csv("~/Desktop/2023-06-06_Glover_Wk4SmallMultiples/Week4HW/anscombe-data.csv")

Change data columns to integers

AnscombeData.df
##    id series  x     y
## 1   1      1 10  8.04
## 2   2      1  8  6.95
## 3   3      1 13  7.58
## 4   4      1  9  8.81
## 5   5      1 11  8.33
## 6   6      1 14  9.96
## 7   7      1  6  7.24
## 8   8      1  4  4.26
## 9   9      1 12 10.84
## 10 10      1  7  4.82
## 11 11      1  5  5.68
## 12 12      2 10  9.14
## 13 13      2  8  8.14
## 14 14      2 13  8.74
## 15 15      2  9  8.77
## 16 16      2 11  9.26
## 17 17      2 14  8.10
## 18 18      2  6  6.13
## 19 19      2  4  3.10
## 20 20      2 12  9.13
## 21 21      2  7  7.26
## 22 22      2  5  4.74
## 23 23      3 10  7.46
## 24 24      3  8  6.77
## 25 25      3 13 12.74
## 26 26      3  9  7.11
## 27 27      3 11  7.81
## 28 28      3 14  8.84
## 29 29      3  6  6.08
## 30 30      3  4  5.39
## 31 31      3 12  8.15
## 32 32      3  7  6.42
## 33 33      3  5  5.73
## 34 34      4  8  6.58
## 35 35      4  8  5.76
## 36 36      4  8  7.71
## 37 37      4  8  8.84
## 38 38      4  8  8.47
## 39 39      4  8  7.04
## 40 40      4  8  5.25
## 41 41      4 19 12.50
## 42 42      4  8  5.56
## 43 43      4  8  7.91
## 44 44      4  8  6.89
AnscombeData.df$id <- as.integer(AnscombeData.df$id)
AnscombeData.df$series <- as.integer(AnscombeData.df$series)
head(AnscombeData.df)
##   id series  x    y
## 1  1      1 10 8.04
## 2  2      1  8 6.95
## 3  3      1 13 7.58
## 4  4      1  9 8.81
## 5  5      1 11 8.33
## 6  6      1 14 9.96

###View Summary Data

summary(AnscombeData.df)
##        id            series           x            y         
##  Min.   : 1.00   Min.   :1.00   Min.   : 4   Min.   : 3.100  
##  1st Qu.:11.75   1st Qu.:1.75   1st Qu.: 7   1st Qu.: 6.117  
##  Median :22.50   Median :2.50   Median : 8   Median : 7.520  
##  Mean   :22.50   Mean   :2.50   Mean   : 9   Mean   : 7.501  
##  3rd Qu.:33.25   3rd Qu.:3.25   3rd Qu.:11   3rd Qu.: 8.748  
##  Max.   :44.00   Max.   :4.00   Max.   :19   Max.   :12.740
summary_stats <- AnscombeData.df %>%
  group_by(series) %>%
  summarize(mean_x = mean(x),
            sd_x = sd(x),
            mean_y = mean(y),
            sd_y = sd(y),
            r_squared = cor(x, y)^2) %>%
  mutate(intercept = mean_y - r_squared * sd_y / sd_x * mean_x,
         slope = r_squared * sd_y / sd_x)
summary_stats
## # A tibble: 4 × 8
##   series mean_x  sd_x mean_y  sd_y r_squared intercept slope
##    <int>  <dbl> <dbl>  <dbl> <dbl>     <dbl>     <dbl> <dbl>
## 1      1      9  3.32   7.50  2.03     0.667      3.83 0.408
## 2      2      9  3.32   7.50  2.03     0.666      3.83 0.408
## 3      3      9  3.32   7.5   2.03     0.666      3.83 0.408
## 4      4      9  3.32   7.50  2.03     0.667      3.83 0.408

Create the four charts

SPData <- AnscombeData.df %>%
  ggplot(aes(x = x, y = y)) +
  geom_point() +
  facet_wrap(~ series, ncol = 2) +
  geom_smooth(method = "lm", se = FALSE, color = "darkblue") +
  geom_text(data = summary_stats, aes(label = paste0("Mean X: ", round(mean_x, 2), "\nMean Y: ", round(mean_y, 2),
                                                  "\nSD X: ", round(sd_x, 2), "\nSD Y: ", round(sd_y, 2),
                                                  "\nR^2: ", round(r_squared, 2))),
            x = Inf, y = Inf, hjust = 1, vjust = 1, color = "black", size = 3) +
  geom_text(data = summary_stats, aes(label = paste0("y = ", round(intercept, 2), " + ", round(slope, 2), "x")),
            x = Inf, y = -Inf, hjust = 1, vjust = 0, color = "black", size = 3) + 
  geom_vline(data = summary_stats, aes(xintercept = intercept), color = "black", linetype = "solid") +
  geom_hline(data = summary_stats, aes(yintercept = intercept), color = "black", linetype = "solid") + 
  ggtitle("Anscombe Quartet") +
  theme(plot.title = element_text(family = "Times New Roman", face = "bold", hjust = 0.5))
SPData
## `geom_smooth()` using formula = 'y ~ x'