ps3.knit

# List of packages
packages <- c("tidyverse", "fst", "modelsummary", "broom", "sjPlot", "ggplot2", "car", "Lock5Data", "mosaic") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
## 
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
## 
## Loading required package: carData
## 
## 
## Attaching package: 'car'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## 
## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## 
## Attaching package: 'mosaic'
## 
## 
## The following object is masked from 'package:Matrix':
## 
##     mean
## 
## 
## The following objects are masked from 'package:car':
## 
##     deltaMethod, logit
## 
## 
## The following object is masked from 'package:modelsummary':
## 
##     msummary
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## 
## 
## The following object is masked from 'package:purrr':
## 
##     cross
## 
## 
## The following object is masked from 'package:ggplot2':
## 
##     stat
## 
## 
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
##     quantile, sd, t.test, var
## 
## 
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "fst"       "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "broom"        "modelsummary" "fst"          "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "sjPlot"       "broom"        "modelsummary" "fst"          "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[6]]
##  [1] "sjPlot"       "broom"        "modelsummary" "fst"          "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[7]]
##  [1] "car"          "carData"      "sjPlot"       "broom"        "modelsummary"
##  [6] "fst"          "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "Lock5Data"    "car"          "carData"      "sjPlot"       "broom"       
##  [6] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "mosaic"       "mosaicData"   "ggformula"    "Matrix"       "lattice"     
##  [6] "Lock5Data"    "car"          "carData"      "sjPlot"       "broom"       
## [11] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"

sat_gpa <- read_csv("sat_gpa.csv")

## Rows: 1000 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): sex
## dbl (5): sat_verbal, sat_math, sat_total, gpa_hs, gpa_fy
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cor(sat_gpa$sat_math, sat_gpa$gpa_fy)

## [1] 0.3871178

The output shows a value of 0.38, indicating a weak to moderate positive relationship.

ggplot(sat_gpa, aes(x =sat_math , y = gpa_fy)) +
  geom_point(size = 0.5) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "Math SAT Score", y = "Freshman GPA")

## `geom_smooth()` using formula = 'y ~ x'

model_total <- lm(gpa_fy ~ sat_total , data = sat_gpa )
model_math <- lm(gpa_fy ~ sat_math , data = sat_gpa )
models <- list("Model math" = model_math , "Model total" = model_total)
modelsummary(models)

tinytable_2wt9k5mwv3rv1fee5ebt

	Model math	Model total
(Intercept)	0.622	0.002
	(0.141)	(0.152)
sat_math	0.034
	(0.003)
sat_total		0.024
		(0.001)
Num.Obs.	1000	1000
R2	0.150	0.212
R2 Adj.	0.149	0.211
AIC	2080.5	2004.8
BIC	2095.2	2019.5
Log.Lik.	-1037.243	-999.382
RMSE	0.68	0.66

tab_model(model_math,model_total, title = "Regression Analysis of SAT Scores and Freshman GPA")

Regression Analysis of SAT Scores and Freshman GPA
	gpa_fy			gpa_fy
Predictors	Estimates	CI	p	Estimates	CI	p
(Intercept)	0.62	0.35 – 0.90	<0.001	0.00	-0.30 – 0.30	0.990
sat math	0.03	0.03 – 0.04	<0.001
sat total				0.02	0.02 – 0.03	<0.001
Observations	1000			1000
R² / R² adjusted	0.150 / 0.149			0.212 / 0.211

In the first model, we explore the impact of SAT math scores on freshman GPA. The intercept of 0.62 indicates the baseline GPA when SAT math is zero, and this value is statistically significant (p-value <0.001). The coefficient for SAT math is 0.03, meaning that for each unit increase in SAT math score, freshman GPA increases by 0.03 units. This relationship is highly significant, with a confidence interval of 0.03 to 0.04 and a p-value less than 0.001, suggesting a strong positive relationship between SAT math scores and freshman GPA. The R-squared value for this model is 0.150, and the adjusted R-squared value is 0.149, indicating that approximately 14.9% of the variance in freshman GPA is explained by SAT math scores alone. The second model shifts focus to SAT total scores as the sole predictor of freshman GPA. The intercept is 0.00, indicating the baseline GPA when SAT total is zero, and is not statistically significant (p-value = 0.990). The coefficient for SAT total is 0.02, indicating that each unit increase in SAT total score corresponds to a 0.02 unit increase in freshman GPA. This relationship is highly significant, with a confidence interval of 0.02 to 0.03 and a p-value less than 0.001, highlighting the importance of SAT total scores in determining freshman GPA. The R-squared value for this model is 0.212, and the adjusted R-squared value is 0.211, showing that approximately 21.1% of the variance in freshman GPA is explained by SAT total scores alone. Comparing the adjusted R-squared values across the two models provides insight into the explanatory power of each model. The adjusted R-squared value for the first model (0.149) shows that SAT math scores alone explain a substantial portion of the variance in freshman GPA. The second model, with an adjusted R-squared value of 0.211, indicates that SAT total scores explain more variance in freshman GPA than SAT math scores.

data("HappyPlanetIndex")
World <- HappyPlanetIndex
model <- lm(HLY ~ GDPperCapita, data = World)

modelsummary(model,
             statistic = "conf.int",
             coef_map = c("GDPperCapita" = "GDP per Capita"),
             gof_map = c("nobs", "r.squared", "adj.r.squared", "aic",   "bic"))

tinytable_8j9q2vrv9n94o5g7fqqc

	(1)
GDP per Capita	0.001
	[0.001, 0.001]
Num.Obs.	141
R2	0.566
R2 Adj.	0.563
AIC	1043.2
BIC	1052.0

tab_model(model)

	HLY
Predictors	Estimates	CI	p
(Intercept)	31.18	28.98 – 33.38	<0.001
GDPperCapita	0.00	0.00 – 0.00	<0.001
Observations	141
R² / R² adjusted	0.566 / 0.563

model1 <- lm(Happiness ~ LifeExpectancy, data = World)
model2 <- lm(Happiness ~ Footprint, data = World)
model3 <- lm(Happiness ~ GDPperCapita + HDI + Population, data = World)

models <- list(model1, model2, model3)
modelplot(models, coef_omit = "Intercept") +
    labs(x = 'Coefficient Estimate',
         y = 'Term',
         title = 'Model Coefficients with Confidence Intervals',
         caption = 'Comparison of Models 1, 2, and 3') +
    theme_minimal()

modelsummary(models,
             statistic = "conf.int",
             coef_map = c("LifeExpectancy" = "Life Expectancy",
                          "Footprint" = "Ecological Footprint",
                          "GDPperCapita" = "GDP per Capita",
                          "HDI" = "Human Development Index",
                          "Population" = "Population (millions)"),
             gof_map = c("nobs", "r.squared", "adj.r.squared", "aic", "bic"))

tinytable_v1axqfmykgcfrvc8z3mj

	(1)	(2)	(3)
Life Expectancy	0.104
	[0.092, 0.115]
Ecological Footprint		0.419
		[0.336, 0.502]
GDP per Capita			0.000
			[0.000, 0.000]
Human Development Index			5.778
			[4.636, 6.920]
Population (millions)			0.001
			[0.000, 0.001]
Num.Obs.	143	143	141
R2	0.693	0.414	0.707
R2 Adj.	0.691	0.410	0.700
AIC	332.7	425.0	327.6
BIC	341.6	433.9	342.3

load("Violence.RData")

model <- lm(MurderRate ~ Internet + GDP, data = Violence)
modelsummary(model)

tinytable_kes93275685kbd0udc5v

	(1)
(Intercept)	28.984
	(11.930)
Internet	0.463
	(0.438)
GDP	-0.001
	(0.001)
Num.Obs.	8
R2	0.602
R2 Adj.	0.443
AIC	72.1
BIC	72.4
Log.Lik.	-32.059
RMSE	13.31

tab_model(model)

	MurderRate
Predictors	Estimates	CI	p
(Intercept)	28.98	-1.68 – 59.65	0.059
Internet	0.46	-0.66 – 1.59	0.339
GDP	-0.00	-0.00 – 0.00	0.080
Observations	8
R² / R² adjusted	0.602 / 0.443

In this model, we explore the impact of internet usage and GDP on the murder rate. The intercept of 28.98 indicates the baseline murder rate when both internet usage and GDP are zero, though this value is not statistically significant (p-value = 0.059).

The coefficient for internet usage is 0.46, meaning that for each unit increase in internet usage, the murder rate increases by 0.46 units. However, this relationship is not statistically significant, with a confidence interval of -0.66 to 1.59 and a p-value of 0.339, suggesting a weak or negligible relationship between internet usage and the murder rate.

The coefficient for GDP is -0.00, indicating that for each unit increase in GDP, the murder rate decreases by 0.00 units. This relationship is close to being statistically significant, with a confidence interval of -0.00 to 0.00 and a p-value of 0.080, suggesting a possible but very small negative relationship between GDP and the murder rate.

The model is based on 8 observations. The R-squared value for this model is 0.602, and the adjusted R-squared value is 0.443, indicating that approximately 44.3% of the variance in the murder rate is explained by internet usage and GDP together.