# Load required libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(patchwork) # For combining plots
# Reshape Anscombe's quartet into long format
data(anscombe)
df <- anscombe
# Convert to long format for easier plotting
anscombe_long <- tibble(
x = c(df$x1, df$x2, df$x3, df$x4),
y = c(df$y1, df$y2, df$y3, df$y4),
set = rep(c("Set 1", "Set 2", "Set 3", "Set 4"), each = 11)
)
# Create base plot function
plot_lm <- function(data, set_name) {
ggplot(data, aes(x = x, y = y)) +
geom_point(shape = 16, color = "black") +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = paste(" ", set_name), x = "x", y = "y") +
theme_minimal()
}
# Split by set and make plots
plots <- anscombe_long %>%
group_split(set) %>%
purrr::map2(unique(anscombe_long$set), ~plot_lm(.x, .y))
# Arrange in 2x2 panel
(plots[[1]] | plots[[2]]) / (plots[[3]] | plots[[4]])
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
(Notice error messages indicate no loess plot for Data Set 4 because of so many redundant x values)
ggplot(anscombe_long, aes(x = x, y = y)) +
geom_point(size = 2) +
geom_smooth(method = "loess", se = FALSE, color = "darkred", linewidth = 1) +
facet_wrap(~set, nrow = 2) +
labs(title = "Anscombe: Loess Smoothing", x = "x", y = "y") +
theme_minimal(base_size = 14)
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at 7.945
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 0.003025
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 7.945
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.055
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 122.21
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning: Failed to fit group -1.
## Caused by error in `predLoess()`:
## ! NA/NaN/Inf in foreign function call (arg 5)
expdata <- read.table("ahbloessexampleSIM.txt",header=T)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
describe(expdata)
## vars n mean sd median trimmed mad min max range skew kurtosis
## enhance 1 1904 13.89 4.01 14 14.09 4.45 4 21 17 -0.40 -0.39
## alcohol 2 1904 5.14 2.41 5 5.07 2.97 1 10 9 0.26 -0.77
## se
## enhance 0.09
## alcohol 0.06
#Install lavaan if needed
if (!require("cowplot", character.only = TRUE)) {
install.packages("cowplot")
library(cowplot, character.only = TRUE)
}
## Loading required package: cowplot
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:patchwork':
##
## align_plots
library(cowplot)
ggplot(expdata, aes(x=alcohol, y=enhance, se=TRUE))+geom_jitter()+
stat_smooth()+
background_grid(major = c("xy"),
minor = c("none"), size.major = 0.2,
colour.major = "grey90", colour.minor = "black98")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# Figure 5.4 First Anscombe Dataset with Confidence Interval
# Extract the first dataset (x1 and y1)
anscombe_df <- data.frame(x = anscombe$x1, y = anscombe$y1)
ggplot(anscombe_df, aes(x = x, y = y)) +
geom_point(color = "black") +
geom_smooth(method = "lm", se = TRUE, color = "blue", fill = "lightblue", formula = y ~ x) +
labs(
title = "Linear Regression with 95% Confidence Band",
x = "x1",
y = "y1"
) +
theme_minimal()
# Calculate centroid (mean x, mean y) for each group
centroids <- anscombe_long %>%
group_by(set) %>%
summarize(cx = mean(x), cy = mean(y))
# Join centroids to original data
df_centroid <- anscombe_long %>%
left_join(centroids, by = "set")
# Plot with segments to centroid
ggplot(df_centroid, aes(x = x, y = y)) +
geom_segment(aes(xend = cx, yend = cy), alpha = 0.4, color = "gray") +
geom_point(size = 2, color = "black") +
geom_point(aes(x = cx, y = cy), color = "red", size = 3, shape = 16) + # red dot
facet_wrap(~set, nrow = 2) +
labs(title = "Anscombe: Deviation from Centroid", x = "x", y = "y") +
scale_x_continuous(breaks = c(4, 8, 12, 16, 20)) +
scale_y_continuous(breaks = c(3, 5, 7, 9, 11, 13)) +
theme_minimal(base_size = 14)