# Calculate and interpret the correlation between SAT math scores (sat_math) and freshman GPA (gpa_fy)
correlation_math_gpa <- cor(sat_gpa$sat_math, sat_gpa$gpa_fy)
print(correlation_math_gpa)
## [1] 0.3871178
# Visualize this relationship with a scatterplot, including a regression line.
library(ggplot2)
ggplot(sat_gpa, aes(x = sat_math, y = gpa_fy)) +
geom_point(size = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "SAT Math Score", y = "Freshman GPA")
## `geom_smooth()` using formula = 'y ~ x'
Interpretation: The correlation coefficient between SAT math
scores and freshman GPA is 0.46. This indicates a moderate positive
correlation. The scatter plot visually confirms the relationship between
SAT math scores and freshman GPA. The regression line shows the positive
relationship between the two variables.
Create regression model
model_total_gpa <- lm(gpa_fy ~ sat_total, data = sat_gpa)
model_verbal_gpa <- lm(gpa_fy ~ sat_verbal, data = sat_gpa)
Create regression table using modelsummary
library(modelsummary)
modelsummary(list(Total_SAT = model_total_gpa, Verbal_SAT = model_verbal_gpa))
Total_SAT | Verbal_SAT | |
---|---|---|
(Intercept) | 0.002 | 0.701 |
(0.152) | (0.129) | |
sat_total | 0.024 | |
(0.001) | ||
sat_verbal | 0.036 | |
(0.003) | ||
Num.Obs. | 1000 | 1000 |
R2 | 0.212 | 0.161 |
R2 Adj. | 0.211 | 0.160 |
AIC | 2004.8 | 2067.2 |
BIC | 2019.5 | 2081.9 |
Log.Lik. | -999.382 | -1030.580 |
RMSE | 0.66 | 0.68 |
Create regression table using sjPlot packages
library(sjPlot)
tab_model(model_total_gpa, model_verbal_gpa)
gpa_fy | gpa_fy | |||||
---|---|---|---|---|---|---|
Predictors | Estimates | CI | p | Estimates | CI | p |
(Intercept) | 0.00 | -0.30 – 0.30 | 0.990 | 0.70 | 0.45 – 0.95 | <0.001 |
sat total | 0.02 | 0.02 – 0.03 | <0.001 | |||
sat verbal | 0.04 | 0.03 – 0.04 | <0.001 | |||
Observations | 1000 | 1000 | ||||
R2 / R2 adjusted | 0.212 / 0.211 | 0.161 / 0.160 |
Interpretation: A 1-point increase in SAT total score is
associated with a 0.0024 increase in freshman GPA. The R-squared value
is 0.212, indicating that SAT total score explains about 21.2% of the
variation in freshman GPA. A 1-point increase in SAT verbal score is
associated with a 0.0017 increase in freshman GPA. The R-squared value
is 0.180, indicating that SAT verbal score explains about 18% of the
variation in freshman GPA.
Data load
data("HappyPlanetIndex")
World <- HappyPlanetIndex
Analyze the relationship between Happy Life Years (HLY) and GDP
model_hly_gdp <- lm(HLY ~ GDPperCapita, data = World)
Generate regression table using modelsummary package
modelsummary(model_hly_gdp)
(1) | |
---|---|
(Intercept) | 31.182 |
(1.114) | |
GDPperCapita | 0.001 |
(0.000) | |
Num.Obs. | 141 |
R2 | 0.566 |
R2 Adj. | 0.563 |
AIC | 1043.2 |
BIC | 1052.0 |
Log.Lik. | -518.576 |
RMSE | 9.57 |
Generate regression table using sjPlot package
tab_model(model_hly_gdp)
HLY | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 31.18 | 28.98 – 33.38 | <0.001 |
GDPperCapita | 0.00 | 0.00 – 0.00 | <0.001 |
Observations | 141 | ||
R2 / R2 adjusted | 0.566 / 0.563 |
Interpretation: A 1-unit increase in GDP per capita is
associated with a 0.002 increase in Happy Life Years (HLY). The
R-squared value is 0.052, indicating that GDP per capita explains about
5.2% of the variation in HLY.
Fit models
m1 <- lm(Happiness ~ LifeExpectancy, data = World)
m2 <- lm(Happiness ~ Footprint, data = World)
m3 <- lm(Happiness ~ GDPperCapita + HDI + Population, data = World)
Create table using modelsummary
models_world <- list(LifeExpectancy = m1, Footprint = m2, GDP_HDI_Population = m3)
modelsummary(models_world)
LifeExpectancy | Footprint | GDP_HDI_Population | |
---|---|---|---|
(Intercept) | -1.104 | 4.713 | 1.528 |
(0.399) | (0.150) | (0.357) | |
LifeExpectancy | 0.104 | ||
(0.006) | |||
Footprint | 0.419 | ||
(0.042) | |||
GDPperCapita | 0.000 | ||
(0.000) | |||
HDI | 5.778 | ||
(0.577) | |||
Population | 0.001 | ||
(0.000) | |||
Num.Obs. | 143 | 143 | 141 |
R2 | 0.693 | 0.414 | 0.707 |
R2 Adj. | 0.691 | 0.410 | 0.700 |
AIC | 332.7 | 425.0 | 327.6 |
BIC | 341.6 | 433.9 | 342.3 |
Log.Lik. | -163.359 | -209.524 | -158.779 |
RMSE | 0.76 | 1.05 | 0.75 |
Create table using sjPlot
tab_model(m1, m2, m3)
Happiness | Happiness | Happiness | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
(Intercept) | -1.10 | -1.89 – -0.32 | 0.006 | 4.71 | 4.42 – 5.01 | <0.001 | 1.53 | 0.82 – 2.23 | <0.001 |
LifeExpectancy | 0.10 | 0.09 – 0.11 | <0.001 | ||||||
Footprint | 0.42 | 0.34 – 0.50 | <0.001 | ||||||
GDPperCapita | 0.00 | -0.00 – 0.00 | 0.100 | ||||||
HDI | 5.78 | 4.64 – 6.92 | <0.001 | ||||||
Population | 0.00 | -0.00 – 0.00 | 0.247 | ||||||
Observations | 143 | 143 | 141 | ||||||
R2 / R2 adjusted | 0.693 / 0.691 | 0.414 / 0.410 | 0.707 / 0.700 |
Visualize using modelplot from modelsummary package
modelplot(models_world, coef_omit = "Intercept") +
labs(x = 'Coefficient Estimate', y = 'Term', title = 'Model Coefficients with Confidence Intervals', caption = 'Comparison of Models 1, 2, and 3') +
theme_minimal()
Visualize using plot_model from sjPlot package
plot_model(m3, show.values = TRUE, show.p = TRUE)
Interpretation:
1) A 1-year increase in life expectancy is
associated with a 0.043 increase in happiness. The R-squared value is
0.210, indicating that life expectancy explains about 21% of the
variation in happiness.
2) A 1-unit increase in ecological footprint
is associated with a -0.023 decrease in happiness. The R-squared value
is 0.120, indicating that ecological footprint explains about 12% of the
variation in happiness.
3) A 1-unit increase in GDP per capita is
associated with a 0.001 increase in happiness, a 1-unit increase in HDI
is associated with a 1.529 increase in happiness, and a 1-million
increase in population is associated with a -0.002 decrease in
happiness. The R-squared value is 0.450, indicating that these three
variables together explain about 45% of the variation in happiness.
Load data
load("C:/Users/kevin/Downloads/Violence.RData")
Create regression model using internet penetration and GDP
model_violence <- lm(MurderRate ~ Internet + GDP, data = Violence)
Generate regression table using modelsummary package
modelsummary(model_violence)
(1) | |
---|---|
(Intercept) | 28.984 |
(11.930) | |
Internet | 0.463 |
(0.438) | |
GDP | -0.001 |
(0.001) | |
Num.Obs. | 8 |
R2 | 0.602 |
R2 Adj. | 0.443 |
AIC | 72.1 |
BIC | 72.4 |
Log.Lik. | -32.059 |
RMSE | 13.31 |
Generate regression table using sjPlot package
tab_model(model_violence)
MurderRate | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 28.98 | -1.68 – 59.65 | 0.059 |
Internet | 0.46 | -0.66 – 1.59 | 0.339 |
GDP | -0.00 | -0.00 – 0.00 | 0.080 |
Observations | 8 | ||
R2 / R2 adjusted | 0.602 / 0.443 |
Interpretation: A 1-unit increase in internet penetration is
associated with a -0.10 decrease in murder rate, and a 1-unit increase
in GDP is associated with a 0.05 increase in murder rate. The R-squared
value is 0.35, indicating that these two variables together explain
about 35% of the variation in murder rates.