# Clear the workspace
rm(list = ls()) # Clear all files from your environment
# gc() # Clear unused memory
cat("\f") # Clear the console
graphics.off() # Clear all graphs
# Clear the workspace
rm(list = ls()) # Clear all files from your environment
# gc() # Clear unused memory
cat("\f") # Clear the console
graphics.off() # Clear all graphs
x1 <- c(10,8,13,9,11,14,6,4,12,7,5)
x2 <- x1
x3 <- x1
x4 <- c(rep(x = 8, times=7), 19, rep(x = 8, times=3 ))
y1 <- c(8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68)
y2 <- c(9.14, 8.14, 8.74, 8.77, 9.26, 8.1, 6.13, 3.1, 9.13, 7.26, 4.74)
y3 <- c(7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42,5.73)
y4 <- c(6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.5, 5.56, 7.91, 6.89)
df <- as.data.frame(cbind(y1,y2,y3,y4,x1,x2,x3,x4))
library(stargazer)
Please cite as:
Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(mod1, mod2, mod3, mod4, type = "text")
====================================================================
Dependent variable:
---------------------------------------
y1 y2 y3 y4
(1) (2) (3) (4)
--------------------------------------------------------------------
x1 0.500***
(0.118)
x2 0.500***
(0.118)
x3 0.500***
(0.118)
x4 0.500***
(0.118)
Constant 3.000** 3.001** 3.002** 3.002**
(1.125) (1.125) (1.124) (1.124)
--------------------------------------------------------------------
Observations 11 11 11 11
R2 0.667 0.666 0.666 0.667
Adjusted R2 0.629 0.629 0.629 0.630
Residual Std. Error (df = 9) 1.237 1.237 1.236 1.236
F Statistic (df = 1; 9) 17.990*** 17.966*** 17.972*** 18.003***
====================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
summary(mod1)
Call:
lm(formula = y1 ~ x1, data = df)
Residuals:
Min 1Q Median 3Q Max
-1.92127 -0.45577 -0.04136 0.70941 1.83882
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.0001 1.1247 2.667 0.02573 *
x1 0.5001 0.1179 4.241 0.00217 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.237 on 9 degrees of freedom
Multiple R-squared: 0.6665, Adjusted R-squared: 0.6295
F-statistic: 17.99 on 1 and 9 DF, p-value: 0.00217
# Create a scatter plot of the raw data
plot(x = x1,
y = y1,
main = "Scatter Plot with Best-Fit Line",
xlab = "X-axis",
ylab = "Y-axis",
pch = 19)
# Add the best-fit line to the plot
abline(mod1, col = "red")
plot(mod1)
df$fitted_y1 <- fitted(mod1)
summary(mod2)
Call:
lm(formula = y2 ~ x2, data = df)
Residuals:
Min 1Q Median 3Q Max
-1.9009 -0.7609 0.1291 0.9491 1.2691
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.001 1.125 2.667 0.02576 *
x2 0.500 0.118 4.239 0.00218 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.237 on 9 degrees of freedom
Multiple R-squared: 0.6662, Adjusted R-squared: 0.6292
F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002179
# Create a scatter plot of the raw data
plot(x = x2, y = y2,
main = "Scatter Plot with Best-Fit Line",
xlab = "X-axis",
ylab = "Y-axis",
pch = 19)
# Add the best-fit line to the plot
abline(mod2, col = "red")
plot(mod2)
df$fitted_y2 <- fitted(mod2)
Try a different specification:
Call:
lm(formula = y2 ~ x2 + I(x2^2), data = df)
Residuals:
Min 1Q Median 3Q Max
-0.0013287 -0.0011888 -0.0006294 0.0008741 0.0023776
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.9957343 0.0043299 -1385 <2e-16 ***
x2 2.7808392 0.0010401 2674 <2e-16 ***
I(x2^2) -0.1267133 0.0000571 -2219 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.001672 on 8 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 7.378e+06 on 2 and 8 DF, p-value: < 2.2e-16
# # Create a scatter plot of the raw data
# plot(x = x2, y = y2,
# main = "Scatter Plot with Best-Fit Line",
# xlab = "X-axis",
# ylab = "Y-axis",
# pch = 19)
#
# # Add the best-fit line to the plot
# abline(mod2_modified, col = "red")
plot(mod2_modified)
summary(mod3)
Call:
lm(formula = y3 ~ x3, data = df)
Residuals:
Min 1Q Median 3Q Max
-1.1586 -0.6146 -0.2303 0.1540 3.2411
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.0025 1.1245 2.670 0.02562 *
x3 0.4997 0.1179 4.239 0.00218 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.236 on 9 degrees of freedom
Multiple R-squared: 0.6663, Adjusted R-squared: 0.6292
F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002176
# Create a scatter plot of the raw data
plot(x = x3, y = y3,
main = "Scatter Plot with Best-Fit Line",
xlab = "X-axis",
ylab = "Y-axis",
pch = 19)
# Add the best-fit line to the plot
abline(mod3, col = "red")
plot(mod3)
df$fitted_y3 <- fitted(mod3)
summary(mod4)
Call:
lm(formula = y4 ~ x4, data = df)
Residuals:
Min 1Q Median 3Q Max
-1.751 -0.831 0.000 0.809 1.839
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.0017 1.1239 2.671 0.02559 *
x4 0.4999 0.1178 4.243 0.00216 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.236 on 9 degrees of freedom
Multiple R-squared: 0.6667, Adjusted R-squared: 0.6297
F-statistic: 18 on 1 and 9 DF, p-value: 0.002165
# Create a scatter plot of the raw data
plot(x = x4, y = y4,
main = "Scatter Plot with Best-Fit Line",
xlab = "X-axis",
ylab = "Y-axis",
pch = 19)
# Add the best-fit line to the plot
abline(mod4, col = "red")
plot(mod4)
Warning: not plotting observations with leverage one:
8
df$fitted_y4 <- fitted(mod4)
Second, I learned about a simple but powerful way to visualize your residuals from non-base R commands that may be well worth a look.
Video - https://www.youtube.com/watch?v=Bi8sHIo3s1Y
easystats
: Quickly investigate model performance: https://www.r-bloggers.com/2021/07/easystats-quickly-investigate-model-performance/