# List of packages
packages <- c("tidyverse", "fst", "modelsummary", "broom", "sjPlot", "ggplot2", "car", "Lock5Data", "mosaic") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
##
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
##
## Loading required package: carData
##
##
## Attaching package: 'car'
##
##
## The following object is masked from 'package:dplyr':
##
## recode
##
##
## The following object is masked from 'package:purrr':
##
## some
##
##
## Registered S3 method overwritten by 'mosaic':
## method from
## fortify.SpatialPolygonsDataFrame ggplot2
##
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
##
## Attaching package: 'mosaic'
##
##
## The following object is masked from 'package:Matrix':
##
## mean
##
##
## The following objects are masked from 'package:car':
##
## deltaMethod, logit
##
##
## The following object is masked from 'package:modelsummary':
##
## msummary
##
##
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
##
##
## The following object is masked from 'package:purrr':
##
## cross
##
##
## The following object is masked from 'package:ggplot2':
##
## stat
##
##
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
## quantile, sd, t.test, var
##
##
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "broom" "modelsummary" "fst" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "sjPlot" "broom" "modelsummary" "fst" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[6]]
## [1] "sjPlot" "broom" "modelsummary" "fst" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[7]]
## [1] "car" "carData" "sjPlot" "broom" "modelsummary"
## [6] "fst" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[8]]
## [1] "Lock5Data" "car" "carData" "sjPlot" "broom"
## [6] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[9]]
## [1] "mosaic" "mosaicData" "ggformula" "Matrix" "lattice"
## [6] "Lock5Data" "car" "carData" "sjPlot" "broom"
## [11] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
sat_gpa <- read_csv("sat_gpa.csv")
## Rows: 1000 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): sex
## dbl (5): sat_verbal, sat_math, sat_total, gpa_hs, gpa_fy
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cor(sat_gpa$sat_math, sat_gpa$gpa_fy)
## [1] 0.3871178
The output shows a value of 0.38, indicating a weak to moderate positive relationship.
ggplot(sat_gpa, aes(x =sat_math , y = gpa_fy)) +
geom_point(size = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Math SAT Score", y = "Freshman GPA")
## `geom_smooth()` using formula = 'y ~ x'
model_total <- lm(gpa_fy ~ sat_total , data = sat_gpa )
model_math <- lm(gpa_fy ~ sat_math , data = sat_gpa )
models <- list("Model math" = model_math , "Model total" = model_total)
modelsummary(models)
| Model math | Model total | |
|---|---|---|
| (Intercept) | 0.622 | 0.002 |
| (0.141) | (0.152) | |
| sat_math | 0.034 | |
| (0.003) | ||
| sat_total | 0.024 | |
| (0.001) | ||
| Num.Obs. | 1000 | 1000 |
| R2 | 0.150 | 0.212 |
| R2 Adj. | 0.149 | 0.211 |
| AIC | 2080.5 | 2004.8 |
| BIC | 2095.2 | 2019.5 |
| Log.Lik. | -1037.243 | -999.382 |
| RMSE | 0.68 | 0.66 |
tab_model(model_math,model_total, title = "Regression Analysis of SAT Scores and Freshman GPA")
| gpa_fy | gpa_fy | |||||
|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 0.62 | 0.35 – 0.90 | <0.001 | 0.00 | -0.30 – 0.30 | 0.990 |
| sat math | 0.03 | 0.03 – 0.04 | <0.001 | |||
| sat total | 0.02 | 0.02 – 0.03 | <0.001 | |||
| Observations | 1000 | 1000 | ||||
| R2 / R2 adjusted | 0.150 / 0.149 | 0.212 / 0.211 | ||||
In the first model, we explore the impact of SAT math scores on freshman GPA. The intercept of 0.62 indicates the baseline GPA when SAT math is zero, and this value is statistically significant (p-value <0.001). The coefficient for SAT math is 0.03, meaning that for each unit increase in SAT math score, freshman GPA increases by 0.03 units. This relationship is highly significant, with a confidence interval of 0.03 to 0.04 and a p-value less than 0.001, suggesting a strong positive relationship between SAT math scores and freshman GPA. The R-squared value for this model is 0.150, and the adjusted R-squared value is 0.149, indicating that approximately 14.9% of the variance in freshman GPA is explained by SAT math scores alone.
The second model shifts focus to SAT total scores as the sole predictor of freshman GPA. The intercept is 0.00, indicating the baseline GPA when SAT total is zero, and is not statistically significant (p-value = 0.990). The coefficient for SAT total is 0.02, indicating that each unit increase in SAT total score corresponds to a 0.02 unit increase in freshman GPA. This relationship is highly significant, with a confidence interval of 0.02 to 0.03 and a p-value less than 0.001, highlighting the importance of SAT total scores in determining freshman GPA. The R-squared value for this model is 0.212, and the adjusted R-squared value is 0.211, showing that approximately 21.1% of the variance in freshman GPA is explained by SAT total scores alone.
Comparing the adjusted R-squared values across the two models provides insight into the explanatory power of each model. The adjusted R-squared value for the first model (0.149) shows that SAT math scores alone explain a substantial portion of the variance in freshman GPA. The second model, with an adjusted R-squared value of 0.211, indicates that SAT total scores explain more variance in freshman GPA than SAT math scores.
data("HappyPlanetIndex")
World <- HappyPlanetIndex
model <- lm(HLY ~ GDPperCapita, data = World)
modelsummary(model,
statistic = "conf.int",
coef_map = c("GDPperCapita" = "GDP per Capita"),
gof_map = c("nobs", "r.squared", "adj.r.squared", "aic", "bic"))
| (1) | |
|---|---|
| GDP per Capita | 0.001 |
| [0.001, 0.001] | |
| Num.Obs. | 141 |
| R2 | 0.566 |
| R2 Adj. | 0.563 |
| AIC | 1043.2 |
| BIC | 1052.0 |
tab_model(model)
| HLY | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 31.18 | 28.98 – 33.38 | <0.001 |
| GDPperCapita | 0.00 | 0.00 – 0.00 | <0.001 |
| Observations | 141 | ||
| R2 / R2 adjusted | 0.566 / 0.563 | ||
model1 <- lm(Happiness ~ LifeExpectancy, data = World)
model2 <- lm(Happiness ~ Footprint, data = World)
model3 <- lm(Happiness ~ GDPperCapita + HDI + Population, data = World)
models <- list(model1, model2, model3)
modelplot(models, coef_omit = "Intercept") +
labs(x = 'Coefficient Estimate',
y = 'Term',
title = 'Model Coefficients with Confidence Intervals',
caption = 'Comparison of Models 1, 2, and 3') +
theme_minimal()
modelsummary(models,
statistic = "conf.int",
coef_map = c("LifeExpectancy" = "Life Expectancy",
"Footprint" = "Ecological Footprint",
"GDPperCapita" = "GDP per Capita",
"HDI" = "Human Development Index",
"Population" = "Population (millions)"),
gof_map = c("nobs", "r.squared", "adj.r.squared", "aic", "bic"))
| (1) | (2) | (3) | |
|---|---|---|---|
| Life Expectancy | 0.104 | ||
| [0.092, 0.115] | |||
| Ecological Footprint | 0.419 | ||
| [0.336, 0.502] | |||
| GDP per Capita | 0.000 | ||
| [0.000, 0.000] | |||
| Human Development Index | 5.778 | ||
| [4.636, 6.920] | |||
| Population (millions) | 0.001 | ||
| [0.000, 0.001] | |||
| Num.Obs. | 143 | 143 | 141 |
| R2 | 0.693 | 0.414 | 0.707 |
| R2 Adj. | 0.691 | 0.410 | 0.700 |
| AIC | 332.7 | 425.0 | 327.6 |
| BIC | 341.6 | 433.9 | 342.3 |
load("Violence.RData")
model <- lm(MurderRate ~ Internet + GDP, data = Violence)
modelsummary(model)
| (1) | |
|---|---|
| (Intercept) | 28.984 |
| (11.930) | |
| Internet | 0.463 |
| (0.438) | |
| GDP | -0.001 |
| (0.001) | |
| Num.Obs. | 8 |
| R2 | 0.602 |
| R2 Adj. | 0.443 |
| AIC | 72.1 |
| BIC | 72.4 |
| Log.Lik. | -32.059 |
| RMSE | 13.31 |
tab_model(model)
| MurderRate | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 28.98 | -1.68 – 59.65 | 0.059 |
| Internet | 0.46 | -0.66 – 1.59 | 0.339 |
| GDP | -0.00 | -0.00 – 0.00 | 0.080 |
| Observations | 8 | ||
| R2 / R2 adjusted | 0.602 / 0.443 | ||
In this model, we explore the impact of internet usage and GDP on the murder rate. The intercept of 28.98 indicates the baseline murder rate when both internet usage and GDP are zero, though this value is not statistically significant (p-value = 0.059).
The coefficient for internet usage is 0.46, meaning that for each unit increase in internet usage, the murder rate increases by 0.46 units. However, this relationship is not statistically significant, with a confidence interval of -0.66 to 1.59 and a p-value of 0.339, suggesting a weak or negligible relationship between internet usage and the murder rate.
The coefficient for GDP is -0.00, indicating that for each unit increase in GDP, the murder rate decreases by 0.00 units. This relationship is close to being statistically significant, with a confidence interval of -0.00 to 0.00 and a p-value of 0.080, suggesting a possible but very small negative relationship between GDP and the murder rate.
The model is based on 8 observations. The R-squared value for this model is 0.602, and the adjusted R-squared value is 0.443, indicating that approximately 44.3% of the variance in the murder rate is explained by internet usage and GDP together.