#Install ggplot2 if needed
if (!require("ggplot2", character.only = TRUE)) {
install.packages("ggplot2")
library(ggplot2, character.only = TRUE)
}
## Loading required package: ggplot2
library(ggplot2)
# Create a data frame with reversed start and end points for each arrow
arrow_data <- data.frame(
x = c(1, 0.5, 3.5), # Now the original xend values become the start
y = c(3, 2, 1),
xend = c(2, 2, 2), # All arrows now point rightward (reverse of before)
yend = c(3, 2, 1)
)
# Plot reversed arrows
ggplot(arrow_data) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend),
arrow = arrow(length = unit(0.2, "inches")),
linewidth = 1.5) +
xlim(-1, 4) + ylim(0, 4) +
theme_minimal(base_size = 14) +
labs(title = "Vectors with length 1, 1.5 and -1.5") +
theme(axis.title = element_blank())
#Figure 6.2 Various Vector Representation of Correlations
library(ggplot2)
if (!require("dplyr", character.only = TRUE)) {
install.packages("dplyr")
library(dplyr, character.only = TRUE)
}
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dplyr)
# Define correlations
correlations <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1)
# Create vectors for each correlation
vector_data <- lapply(correlations, function(r) {
theta <- acos(r)
data.frame(
set = paste0("r = ", r),
x1 = 0, y1 = 0,
x2a = cos(0), y2a = sin(0),
x2b = cos(theta), y2b = sin(theta)
)
}) %>% bind_rows()
# Plot
ggplot(vector_data) +
geom_segment(aes(x = x1, y = y1, xend = x2a, yend = y2a),
arrow = arrow(length = unit(0.2, "inches")),
color = "blue", linewidth = 1) +
geom_segment(aes(x = x1, y = y1, xend = x2b, yend = y2b),
arrow = arrow(length = unit(0.2, "inches")),
color = "red", linewidth = 1) +
coord_fixed() +
facet_wrap(~set, ncol = 4) +
scale_x_continuous(breaks = c(0, 1)) +
scale_y_continuous(breaks = c(0, 0.33, 0.66, 1)) +
theme_minimal(base_size = 14) +
labs(title = "Vector Angles Representing Correlations",
x = "X", y = "Y")
library(ggplot2)
if (!require("grid", character.only = TRUE)) {
install.packages("grid")
library(grid, character.only = TRUE)
}
## Loading required package: grid
library(grid)
# Define angle for correlation = 0.5
# (You can experiment with other correlations)
theta <- acos(0.5)
# Define vectors
v1 <- c(1, 0)
v2 <- c(cos(theta), sin(theta))
v_sum <- v1 + v2
# Create data for arrows
vectors <- data.frame(
x = c(0, v1[1], 0),
y = c(0, v1[2], 0),
xend = c(v1[1], v1[1] + v2[1], v_sum[1]),
yend = c(v1[2], v1[2] + v2[2], v_sum[2]),
label = c("v1", "v2", "v1 + v2")
)
# Plot
ggplot(vectors) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend, color = label),
arrow = arrow(length = unit(0.2, "inches")),
linewidth = 1.2) +
coord_fixed() +
scale_color_manual(values = c("blue", "red", "black")) +
xlim(0, 2.5) + ylim(0, 1.5) +
labs(title = "Vector Addition (Head-to-Tail): Correlation = 0.5",
x = "X", y = "Y") +
theme_minimal(base_size = 14)
library(ggplot2)
if (!require("ggforce", character.only = TRUE)) {
install.packages("ggforce")
library(grid, character.only = TRUE)
}
## Loading required package: ggforce
## Warning: package 'ggforce' was built under R version 4.5.1
library(ggforce)
# Define X and Y vectors (unit vectors with r = 0.5)
v_x <- c(0.5, 0.5) # Predictor X
v_y <- c(0, 1) # Outcome Y
error <- v_y - v_x # Error from predicted Y to actual Y
# Data frame for plotting vectors
df <- data.frame(
x = c(0, 0, v_x[1]),
y = c(0, 0, v_x[2]),
xend = c(v_x[1], v_y[1], v_y[1]),
yend = c(v_x[2], v_y[2], v_y[2]),
label = c("X (Predictor)", "Y (Outcome)", "Error (Residual)")
)
# Calculate angle
theta_rad <- acos(sum(v_x * v_y) / (sqrt(sum(v_x^2)) * sqrt(sum(v_y^2))))
theta_deg <- round(theta_rad * 180 / pi, 1)
# Create plot
ggplot() +
# Vectors
geom_segment(data = df,
aes(x = x, y = y, xend = xend, yend = yend, color = label),
arrow = arrow(length = unit(0.2, "inches")), size = 1.2) +
# Angle arc
geom_arc(aes(x0 = 0, y0 = 0, r = 0.3, start = 0, end = theta_rad), color = "black") +
annotate("text", x = 0.15, y = 0.05, label = expression(theta), parse = TRUE, size = 5) +
# Theme and formatting
coord_fixed() +
labs(
title = "Vector Diagram: Regression of Y on X (r = 0.5)",
subtitle = "Error vector from X to Y",
x = NULL,
y = NULL
) +
theme_minimal(base_size = 14) +
theme(legend.title = element_blank())
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
if (!require("plotly", character.only = TRUE)) {
install.packages("plotly")
library(plotly, character.only = TRUE)
}
## Loading required package: plotly
## Warning: package 'plotly' was built under R version 4.5.1
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(plotly)
# Define correlated predictors
X1 <- c(1, 0, 1)
X2 <- c(0.5, 1, 1.5)
# Observed Y: linear combination + small error
Y <- 0.5 * X1 + 0.5 * X2 + c(0.1, -0.1, 0.2)
# Design matrix and regression
X <- cbind(X1, X2)
beta <- solve(t(X) %*% X) %*% t(X) %*% Y
beta1 <- beta[1]
beta2 <- beta[2]
# Components
vec1 <- beta1 * X1
vec2 <- beta2 * X2
Y_hat <- vec1 + vec2
resid_vec <- Y - Y_hat # residual vector
# Create 3D plot
fig <- plot_ly()
# β₁·X₁ vector
fig <- fig %>%
add_trace(
x = c(0, vec1[1]), y = c(0, vec1[2]), z = c(0, vec1[3]),
type = "scatter3d", mode = "lines+markers",
name = "β₁·X₁", line = list(color = "blue", width = 6),
marker = list(size = 4, color = "blue", symbol = "arrow-bar")
)
# β₂·X₂ vector from β₁·X₁ to Ŷ
fig <- fig %>%
add_trace(
x = c(vec1[1], Y_hat[1]), y = c(vec1[2], Y_hat[2]), z = c(vec1[3], Y_hat[3]),
type = "scatter3d", mode = "lines+markers",
name = "β₂·X₂", line = list(color = "green", width = 6),
marker = list(size = 4, color = "green", symbol = "arrow-bar")
)
# Predicted Y vector (Ŷ)
fig <- fig %>%
add_trace(
x = c(0, Y_hat[1]), y = c(0, Y_hat[2]), z = c(0, Y_hat[3]),
type = "scatter3d", mode = "lines+markers",
name = "Ŷ", line = list(color = "red", dash = "dot", width = 4),
marker = list(size = 4, color = "red", symbol = "arrow-bar")
)
# Observed Y vector
fig <- fig %>%
add_trace(
x = c(0, Y[1]), y = c(0, Y[2]), z = c(0, Y[3]),
type = "scatter3d", mode = "lines+markers",
name = "Observed Y", line = list(color = "black", width = 4),
marker = list(size = 4, color = "black", symbol = "arrow-bar")
)
# Residual vector (Y - Ŷ)
fig <- fig %>%
add_trace(
x = c(Y_hat[1], Y[1]), y = c(Y_hat[2], Y[2]), z = c(Y_hat[3], Y[3]),
type = "scatter3d", mode = "lines+markers",
name = "Residual", line = list(color = "purple", width = 4, dash = "dash"),
marker = list(size = 4, color = "purple", symbol = "arrow-bar")
)
# Axes & layout
fig <- fig %>%
layout(
title = "Vector Diagram of Multiple Regression (Y on Vertical Axis)",
scene = list(
xaxis = list(title = "X₁"),
yaxis = list(title = "X₂"),
zaxis = list(title = "Y (Outcome)")
)
)
fig