setwd(cd)
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 518214 27.7 1145942 61.2 NA 669337 35.8
## Vcells 953920 7.3 8388608 64.0 16384 1839833 14.1
cat("\f")
dev.off
## function (which = dev.cur())
## {
## if (which == 1)
## stop("cannot shut down device 1 (the null device)")
## .External(C_devoff, as.integer(which))
## dev.cur()
## }
## <bytecode: 0x7fcd962cd230>
## <environment: namespace:grDevices>
packages <- c("psych","tidyverse")
for (i in 1:length(packages)) {
if (!packages[i] %in% rownames(installed.packages())) {
install.packages(packages[i]
, repos = "http://cran.rstudio.com/"
, dependencies = TRUE
)
}
library(packages[i], character.only = TRUE)
}
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.4 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
rm(packages)
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(stargazer)
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
Raw data figures shown below.
df_train <- read.csv("moneyball-training-data.csv")
df_eval <- read.csv("moneyball-evaluation-data.csv")
describe(df_train)
## vars n mean sd median trimmed mad min max
## INDEX 1 2276 1268.46 736.35 1270.5 1268.57 952.57 1 2535
## TARGET_WINS 2 2276 80.79 15.75 82.0 81.31 14.83 0 146
## TEAM_BATTING_H 3 2276 1469.27 144.59 1454.0 1459.04 114.16 891 2554
## TEAM_BATTING_2B 4 2276 241.25 46.80 238.0 240.40 47.44 69 458
## TEAM_BATTING_3B 5 2276 55.25 27.94 47.0 52.18 23.72 0 223
## TEAM_BATTING_HR 6 2276 99.61 60.55 102.0 97.39 78.58 0 264
## TEAM_BATTING_BB 7 2276 501.56 122.67 512.0 512.18 94.89 0 878
## TEAM_BATTING_SO 8 2174 735.61 248.53 750.0 742.31 284.66 0 1399
## TEAM_BASERUN_SB 9 2145 124.76 87.79 101.0 110.81 60.79 0 697
## TEAM_BASERUN_CS 10 1504 52.80 22.96 49.0 50.36 17.79 0 201
## TEAM_BATTING_HBP 11 191 59.36 12.97 58.0 58.86 11.86 29 95
## TEAM_PITCHING_H 12 2276 1779.21 1406.84 1518.0 1555.90 174.95 1137 30132
## TEAM_PITCHING_HR 13 2276 105.70 61.30 107.0 103.16 74.13 0 343
## TEAM_PITCHING_BB 14 2276 553.01 166.36 536.5 542.62 98.59 0 3645
## TEAM_PITCHING_SO 15 2174 817.73 553.09 813.5 796.93 257.23 0 19278
## TEAM_FIELDING_E 16 2276 246.48 227.77 159.0 193.44 62.27 65 1898
## TEAM_FIELDING_DP 17 1990 146.39 26.23 149.0 147.58 23.72 52 228
## range skew kurtosis se
## INDEX 2534 0.00 -1.22 15.43
## TARGET_WINS 146 -0.40 1.03 0.33
## TEAM_BATTING_H 1663 1.57 7.28 3.03
## TEAM_BATTING_2B 389 0.22 0.01 0.98
## TEAM_BATTING_3B 223 1.11 1.50 0.59
## TEAM_BATTING_HR 264 0.19 -0.96 1.27
## TEAM_BATTING_BB 878 -1.03 2.18 2.57
## TEAM_BATTING_SO 1399 -0.30 -0.32 5.33
## TEAM_BASERUN_SB 697 1.97 5.49 1.90
## TEAM_BASERUN_CS 201 1.98 7.62 0.59
## TEAM_BATTING_HBP 66 0.32 -0.11 0.94
## TEAM_PITCHING_H 28995 10.33 141.84 29.49
## TEAM_PITCHING_HR 343 0.29 -0.60 1.28
## TEAM_PITCHING_BB 3645 6.74 96.97 3.49
## TEAM_PITCHING_SO 19278 22.17 671.19 11.86
## TEAM_FIELDING_E 1833 2.99 10.97 4.77
## TEAM_FIELDING_DP 176 -0.39 0.18 0.59
glimpse(df_eval)
## Rows: 259
## Columns: 16
## $ INDEX <int> 9, 10, 14, 47, 60, 63, 74, 83, 98, 120, 123, 135, 138…
## $ TEAM_BATTING_H <int> 1209, 1221, 1395, 1539, 1445, 1431, 1430, 1385, 1259,…
## $ TEAM_BATTING_2B <int> 170, 151, 183, 309, 203, 236, 219, 158, 177, 212, 243…
## $ TEAM_BATTING_3B <int> 33, 29, 29, 29, 68, 53, 55, 42, 78, 42, 40, 55, 57, 2…
## $ TEAM_BATTING_HR <int> 83, 88, 93, 159, 5, 10, 37, 33, 23, 58, 50, 164, 186,…
## $ TEAM_BATTING_BB <int> 447, 516, 509, 486, 95, 215, 568, 356, 466, 452, 495,…
## $ TEAM_BATTING_SO <int> 1080, 929, 816, 914, 416, 377, 527, 609, 689, 584, 64…
## $ TEAM_BASERUN_SB <int> 62, 54, 59, 148, NA, NA, 365, 185, 150, 52, 64, 48, 3…
## $ TEAM_BASERUN_CS <int> 50, 39, 47, 57, NA, NA, NA, NA, NA, NA, NA, 28, 21, 8…
## $ TEAM_BATTING_HBP <int> NA, NA, NA, 42, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ TEAM_PITCHING_H <int> 1209, 1221, 1395, 1539, 3902, 2793, 1544, 1626, 1342,…
## $ TEAM_PITCHING_HR <int> 83, 88, 93, 159, 14, 20, 40, 39, 25, 62, 53, 173, 196…
## $ TEAM_PITCHING_BB <int> 447, 516, 509, 486, 257, 420, 613, 418, 497, 482, 521…
## $ TEAM_PITCHING_SO <int> 1080, 929, 816, 914, 1123, 736, 569, 715, 734, 622, 6…
## $ TEAM_FIELDING_E <int> 140, 135, 156, 124, 616, 572, 490, 328, 226, 184, 200…
## $ TEAM_FIELDING_DP <int> 156, 164, 153, 154, 130, 105, NA, 104, 132, 145, 183,…
print(x = 2006-1871+1,
digits = 1)
## [1] 136
136*162
## [1] 22032
Finding any missing data, visually expressing them, and omitting where necessary.
library(visdat)
vis_dat(df_train)
df_no_missing <- df_train
df_no_missing$TEAM_BATTING_HBP = NULL
df_no_missing$TEAM_BATTING_CS = NULL
vis_miss(df_no_missing)
hist(df_no_missing$TEAM_BASERUN_CS )
df_no_missing$TEAM_BASERUN_CS <- ifelse(is.na(x = df_no_missing$TEAM_BASERUN_CS),
yes = mean(x = df_no_missing$TEAM_BASERUN_CS, na.rm = TRUE),
no = df_no_missing$TEAM_BASERUN_CS)
hist(df_no_missing$TEAM_BASERUN_CS )
ggplot(data = df_no_missing,
mapping = aes(x = TEAM_BASERUN_CS)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = df_no_missing,
mapping = aes(x = TEAM_BASERUN_CS)) + geom_histogram(color = "black",
fill = "blue",
binwidth = 10) +
labs(title = "Histogram of X", x = "X", y = "Count")
df_no_missing <- na.omit(df_no_missing)
Identifying which variables have correlation.
library(ggcorrplot)
mycorr<- cor(x = df_no_missing[, 1:ncol(df_no_missing )])
p.mat <- ggcorrplot::cor_pmat(x = df_no_missing[,1:ncol(df_no_missing)])
head(p.mat)
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## INDEX 0.000000000 3.085723e-01 2.870071e-01 1.012176e-01
## TARGET_WINS 0.308572334 0.000000e+00 1.013043e-54 2.490498e-20
## TEAM_BATTING_H 0.287007118 1.013043e-54 0.000000e+00 9.355168e-233
## TEAM_BATTING_2B 0.101217596 2.490498e-20 9.355168e-233 0.000000e+00
## TEAM_BATTING_3B 0.769316672 1.340586e-07 2.896946e-60 2.894398e-02
## TEAM_BATTING_HR 0.006420957 1.416443e-21 1.602541e-06 1.786146e-62
## TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## INDEX 7.693167e-01 6.420957e-03 2.259876e-02 7.106396e-07
## TARGET_WINS 1.340586e-07 1.416443e-21 6.788724e-40 1.082744e-02
## TEAM_BATTING_H 2.896946e-60 1.602541e-06 1.402162e-05 3.110665e-54
## TEAM_BATTING_2B 2.894398e-02 1.786146e-62 2.281359e-23 1.601577e-07
## TEAM_BATTING_3B 0.000000e+00 3.146352e-208 5.493361e-25 1.722999e-271
## TEAM_BATTING_HR 3.146352e-208 0.000000e+00 2.833152e-68 4.262615e-267
## TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_PITCHING_H
## INDEX 1.177395e-04 9.919907e-01 9.517913e-01
## TARGET_WINS 1.846788e-07 6.773671e-01 8.836481e-22
## TEAM_BATTING_H 1.497875e-02 6.096341e-01 7.485250e-277
## TEAM_BATTING_2B 9.931836e-07 9.663607e-06 6.214542e-71
## TEAM_BATTING_3B 8.653120e-30 2.117662e-35 5.645504e-73
## TEAM_BATTING_HR 1.327937e-41 9.273809e-63 2.113987e-05
## TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO
## INDEX 7.419790e-03 1.014059e-02 1.744343e-06
## TARGET_WINS 1.685830e-21 2.011167e-32 4.962297e-03
## TEAM_BATTING_H 4.769017e-08 3.435879e-08 7.730597e-49
## TEAM_BATTING_2B 1.151450e-63 2.211572e-17 5.592219e-07
## TEAM_BATTING_3B 2.009525e-190 1.798207e-07 2.195339e-225
## TEAM_BATTING_HR 0.000000e+00 2.039060e-29 1.157630e-220
## TEAM_FIELDING_E TEAM_FIELDING_DP
## INDEX 1.287249e-02 7.993341e-01
## TARGET_WINS 1.454931e-15 1.165960e-01
## TEAM_BATTING_H 2.969523e-04 1.173988e-11
## TEAM_BATTING_2B 9.295027e-35 1.901141e-14
## TEAM_BATTING_3B 2.596789e-244 7.211648e-21
## TEAM_BATTING_HR 4.191484e-271 2.530492e-36
myplot<-ggcorrplot(corr = mycorr,
method = "square",
type = "full",
title = "Correlation Plot",
colors = c("red", "white","green"),
lab = TRUE,
lab_size = 2,
p.mat = p.mat,
insig = "pch",
pch = 4,
hc.order = TRUE,
tl.cex = 8,
tl.col = "black",
digits = 2
)
myplot
Box plots to find patterns in data.
df_no_missing %>%
gather(variable, value, TARGET_WINS:TEAM_FIELDING_DP) %>%
ggplot(., aes(x= variable, y=value)) +
geom_boxplot() +
facet_wrap(~variable, scales ="free", ncol = 4) +
labs(x = element_blank(), y = element_blank())
Filtering out the outlier missing data so that it does not affect the larger aggregate data.
df_no_missing <- df_no_missing %>%
filter(TARGET_WINS >19) %>%
filter(TARGET_WINS <121)
Reviewing prepped data set.
df_no_missing %>%
gather(variable, value, -TARGET_WINS) %>%
ggplot(., aes(value, TARGET_WINS)) +
geom_point(fill = "blue", color="blue") +
geom_smooth(method = "lm", se = FALSE, color = "black") +
facet_wrap(~variable, scales ="free", ncol = 4) +
labs(x = element_blank(), y = "Wins")
## `geom_smooth()` using formula = 'y ~ x'
mycorr<- cor(x = df_no_missing[, 1:ncol(df_no_missing )])
p.mat <- ggcorrplot::cor_pmat(x = df_no_missing[,1:ncol(df_no_missing)])
head(p.mat)
## INDEX TARGET_WINS TEAM_BATTING_H TEAM_BATTING_2B
## INDEX 0.000000000 3.085723e-01 2.870071e-01 1.012176e-01
## TARGET_WINS 0.308572334 0.000000e+00 1.013043e-54 2.490498e-20
## TEAM_BATTING_H 0.287007118 1.013043e-54 0.000000e+00 9.355168e-233
## TEAM_BATTING_2B 0.101217596 2.490498e-20 9.355168e-233 0.000000e+00
## TEAM_BATTING_3B 0.769316672 1.340586e-07 2.896946e-60 2.894398e-02
## TEAM_BATTING_HR 0.006420957 1.416443e-21 1.602541e-06 1.786146e-62
## TEAM_BATTING_3B TEAM_BATTING_HR TEAM_BATTING_BB TEAM_BATTING_SO
## INDEX 7.693167e-01 6.420957e-03 2.259876e-02 7.106396e-07
## TARGET_WINS 1.340586e-07 1.416443e-21 6.788724e-40 1.082744e-02
## TEAM_BATTING_H 2.896946e-60 1.602541e-06 1.402162e-05 3.110665e-54
## TEAM_BATTING_2B 2.894398e-02 1.786146e-62 2.281359e-23 1.601577e-07
## TEAM_BATTING_3B 0.000000e+00 3.146352e-208 5.493361e-25 1.722999e-271
## TEAM_BATTING_HR 3.146352e-208 0.000000e+00 2.833152e-68 4.262615e-267
## TEAM_BASERUN_SB TEAM_BASERUN_CS TEAM_PITCHING_H
## INDEX 1.177395e-04 9.919907e-01 9.517913e-01
## TARGET_WINS 1.846788e-07 6.773671e-01 8.836481e-22
## TEAM_BATTING_H 1.497875e-02 6.096341e-01 7.485250e-277
## TEAM_BATTING_2B 9.931836e-07 9.663607e-06 6.214542e-71
## TEAM_BATTING_3B 8.653120e-30 2.117662e-35 5.645504e-73
## TEAM_BATTING_HR 1.327937e-41 9.273809e-63 2.113987e-05
## TEAM_PITCHING_HR TEAM_PITCHING_BB TEAM_PITCHING_SO
## INDEX 7.419790e-03 1.014059e-02 1.744343e-06
## TARGET_WINS 1.685830e-21 2.011167e-32 4.962297e-03
## TEAM_BATTING_H 4.769017e-08 3.435879e-08 7.730597e-49
## TEAM_BATTING_2B 1.151450e-63 2.211572e-17 5.592219e-07
## TEAM_BATTING_3B 2.009525e-190 1.798207e-07 2.195339e-225
## TEAM_BATTING_HR 0.000000e+00 2.039060e-29 1.157630e-220
## TEAM_FIELDING_E TEAM_FIELDING_DP
## INDEX 1.287249e-02 7.993341e-01
## TARGET_WINS 1.454931e-15 1.165960e-01
## TEAM_BATTING_H 2.969523e-04 1.173988e-11
## TEAM_BATTING_2B 9.295027e-35 1.901141e-14
## TEAM_BATTING_3B 2.596789e-244 7.211648e-21
## TEAM_BATTING_HR 4.191484e-271 2.530492e-36
myplot<-ggcorrplot(corr = mycorr,
method = "square",
type = "full",
title = "Correlation Plot",
colors = c("red", "white","green"),
lab = TRUE,
lab_size = 2,
p.mat = p.mat,
insig = "pch",
pch = 4,
hc.order = TRUE,
tl.cex = 8,
tl.col = "black",
digits = 2
)
myplot
model1 <- lm(data = df_no_missing,
TARGET_WINS ~ .)
model2 <- lm(data = df_no_missing,
TARGET_WINS ~ TEAM_PITCHING_H + TEAM_FIELDING_E +
TEAM_PITCHING_BB)
model3 <- stepAIC(object = lm(data = df_no_missing, TARGET_WINS ~ .),
direction = c("backward")
)
## Start: AIC=8526.62
## TARGET_WINS ~ INDEX + TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB +
## TEAM_BASERUN_CS + TEAM_PITCHING_H + TEAM_PITCHING_HR + TEAM_PITCHING_BB +
## TEAM_PITCHING_SO + TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - TEAM_PITCHING_HR 1 11.1 187978 8524.7
## - INDEX 1 21.5 187989 8524.8
## - TEAM_BATTING_SO 1 104.2 188071 8525.6
## <none> 187967 8526.6
## - TEAM_BATTING_HR 1 235.9 188203 8526.9
## - TEAM_BATTING_H 1 272.3 188239 8527.3
## - TEAM_PITCHING_BB 1 353.0 188320 8528.1
## - TEAM_PITCHING_SO 1 436.0 188403 8528.9
## - TEAM_BASERUN_CS 1 670.4 188637 8531.2
## - TEAM_BATTING_BB 1 716.8 188684 8531.6
## - TEAM_PITCHING_H 1 1312.2 189279 8537.4
## - TEAM_BATTING_2B 1 3237.3 191204 8556.0
## - TEAM_FIELDING_DP 1 8408.7 196376 8604.9
## - TEAM_BATTING_3B 1 9723.5 197691 8617.2
## - TEAM_BASERUN_SB 1 15739.9 203707 8672.2
## - TEAM_FIELDING_E 1 29328.6 217296 8790.7
##
## Step: AIC=8524.73
## TARGET_WINS ~ INDEX + TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB +
## TEAM_BASERUN_CS + TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_PITCHING_SO +
## TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## - INDEX 1 22.7 188001 8523.0
## <none> 187978 8524.7
## - TEAM_BATTING_SO 1 229.8 188208 8525.0
## - TEAM_BATTING_H 1 326.6 188305 8525.9
## - TEAM_PITCHING_BB 1 404.6 188383 8526.7
## - TEAM_BASERUN_CS 1 685.1 188663 8529.4
## - TEAM_BATTING_BB 1 799.6 188778 8530.5
## - TEAM_PITCHING_SO 1 822.9 188801 8530.7
## - TEAM_PITCHING_H 1 1494.3 189473 8537.3
## - TEAM_BATTING_2B 1 3235.3 191214 8554.0
## - TEAM_FIELDING_DP 1 8431.8 196410 8603.2
## - TEAM_BATTING_3B 1 9719.2 197697 8615.2
## - TEAM_BATTING_HR 1 11281.0 199259 8629.7
## - TEAM_BASERUN_SB 1 15854.4 203833 8671.3
## - TEAM_FIELDING_E 1 29390.8 217369 8789.3
##
## Step: AIC=8522.95
## TARGET_WINS ~ TEAM_BATTING_H + TEAM_BATTING_2B + TEAM_BATTING_3B +
## TEAM_BATTING_HR + TEAM_BATTING_BB + TEAM_BATTING_SO + TEAM_BASERUN_SB +
## TEAM_BASERUN_CS + TEAM_PITCHING_H + TEAM_PITCHING_BB + TEAM_PITCHING_SO +
## TEAM_FIELDING_E + TEAM_FIELDING_DP
##
## Df Sum of Sq RSS AIC
## <none> 188001 8523.0
## - TEAM_BATTING_SO 1 229.1 188230 8523.2
## - TEAM_BATTING_H 1 325.8 188327 8524.1
## - TEAM_PITCHING_BB 1 400.4 188401 8524.9
## - TEAM_BASERUN_CS 1 677.5 188679 8527.6
## - TEAM_BATTING_BB 1 795.1 188796 8528.7
## - TEAM_PITCHING_SO 1 825.2 188826 8529.0
## - TEAM_PITCHING_H 1 1488.2 189489 8535.4
## - TEAM_BATTING_2B 1 3223.9 191225 8552.2
## - TEAM_FIELDING_DP 1 8462.1 196463 8601.7
## - TEAM_BATTING_3B 1 9711.3 197712 8613.4
## - TEAM_BATTING_HR 1 11284.0 199285 8627.9
## - TEAM_BASERUN_SB 1 15856.4 203857 8669.5
## - TEAM_FIELDING_E 1 29396.6 217398 8787.5
stargazer(model1, model2, model3,
type = "text"
)
##
## ================================================================================================
## Dependent variable:
## ----------------------------------------------------------------------------
## TARGET_WINS
## (1) (2) (3)
## ------------------------------------------------------------------------------------------------
## INDEX -0.0002
## (0.0003)
##
## TEAM_BATTING_H -0.027 -0.028*
## (0.017) (0.016)
##
## TEAM_BATTING_2B -0.050*** -0.050***
## (0.009) (0.009)
##
## TEAM_BATTING_3B 0.185*** 0.184***
## (0.019) (0.019)
##
## TEAM_BATTING_HR 0.123 0.097***
## (0.082) (0.009)
##
## TEAM_BATTING_BB 0.112*** 0.115***
## (0.042) (0.041)
##
## TEAM_BATTING_SO 0.022 0.026
## (0.022) (0.018)
##
## TEAM_BASERUN_SB 0.077*** 0.077***
## (0.006) (0.006)
##
## TEAM_BASERUN_CS -0.036** -0.036**
## (0.014) (0.014)
##
## TEAM_PITCHING_H 0.054*** 0.017*** 0.055***
## (0.015) (0.002) (0.014)
##
## TEAM_PITCHING_HR -0.026
## (0.078)
##
## TEAM_PITCHING_BB -0.075* 0.020*** -0.077**
## (0.040) (0.003) (0.039)
##
## TEAM_PITCHING_SO -0.043** -0.047***
## (0.021) (0.017)
##
## TEAM_FIELDING_E -0.121*** -0.048*** -0.120***
## (0.007) (0.005) (0.007)
##
## TEAM_FIELDING_DP -0.111*** -0.111***
## (0.012) (0.012)
##
## Constant 60.797*** 51.788*** 60.856***
## (6.066) (2.688) (6.057)
##
## ------------------------------------------------------------------------------------------------
## Observations 1,835 1,835 1,835
## R2 0.408 0.129 0.408
## Adjusted R2 0.403 0.128 0.404
## Residual Std. Error 10.165 (df = 1819) 12.289 (df = 1831) 10.161 (df = 1821)
## F Statistic 83.597*** (df = 15; 1819) 90.539*** (df = 3; 1831) 96.522*** (df = 13; 1821)
## ================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Models 1 and 3 produced similar results. But I would select Model 3 as the best model, because it has a high R-squared value of 40% in addition to its high F-statistic of 96.5. Model 1 comes in second place for similar reasons, and Model 2 as well.
?par
par(mfrow = c(2,2))
plot(model3)
The residual VS fitted plot is evenly distributed across the line. The normal QQ plot is a straight positive line, so it indicates a uniform distribution. All three plots have the same trendline, meaning there is a consistent linear relationship between the two variables in all four cases. The slope of the trendline is the same in all 3 plots, indicating that the change in the response variable (y-axis) for a unit change in the predictor variable (x-axis) is consistent across all pairs of variables. However one possible issue to investigate further is the clumping to the left of the residuals vs leverage plot, there could be influential observations in the dataset that are having a strong impact on the regression model.