# Clear the workspace
  rm(list = ls()) # Clear all files from your environment
# gc()            # Clear unused memory
  cat("\f")       # Clear the console
  graphics.off()  # Clear all graphs

1 Raw Data

x1 <- c(10,8,13,9,11,14,6,4,12,7,5)
x2 <- x1
x3 <- x1 
x4 <- c(rep(x = 8, times=7), 19, rep(x = 8, times=3 ))

y1 <- c(8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68)
y2 <- c(9.14, 8.14, 8.74, 8.77, 9.26, 8.1, 6.13, 3.1, 9.13, 7.26, 4.74)
y3 <- c(7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42,5.73)
y4 <- c(6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.5, 5.56, 7.91, 6.89)

df <- as.data.frame(cbind(y1,y2,y3,y4,x1,x2,x3,x4))

2 Linear Regressions

2.1 Run the linear regressions

mod1 <- lm(data = df, 
           formula = y1 ~ x1)

mod2 <- lm(data = df, 
           formula = y2 ~ x2)

mod3 <- lm(data = df, 
           formula = y3 ~ x3)

mod4 <- lm(data = df, 
           formula = y4 ~ x4)

2.2 Present the linear regressions

library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(mod1, mod2, mod3, mod4, type = "text")
## 
## ====================================================================
##                                        Dependent variable:          
##                              ---------------------------------------
##                                 y1        y2        y3        y4    
##                                 (1)       (2)       (3)       (4)   
## --------------------------------------------------------------------
## x1                           0.500***                               
##                               (0.118)                               
##                                                                     
## x2                                     0.500***                     
##                                         (0.118)                     
##                                                                     
## x3                                               0.500***           
##                                                   (0.118)           
##                                                                     
## x4                                                         0.500*** 
##                                                             (0.118) 
##                                                                     
## Constant                      3.000**   3.001**   3.002**   3.002** 
##                               (1.125)   (1.125)   (1.124)   (1.124) 
##                                                                     
## --------------------------------------------------------------------
## Observations                    11        11        11        11    
## R2                             0.667     0.666     0.666     0.667  
## Adjusted R2                    0.629     0.629     0.629     0.630  
## Residual Std. Error (df = 9)   1.237     1.237     1.236     1.236  
## F Statistic (df = 1; 9)      17.990*** 17.966*** 17.972*** 18.003***
## ====================================================================
## Note:                                    *p<0.1; **p<0.05; ***p<0.01

3 Residual Anlaysis

3.1 Model 1

summary(mod1)
## 
## Call:
## lm(formula = y1 ~ x1, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.92127 -0.45577 -0.04136  0.70941  1.83882 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   3.0001     1.1247   2.667  0.02573 * 
## x1            0.5001     0.1179   4.241  0.00217 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.237 on 9 degrees of freedom
## Multiple R-squared:  0.6665, Adjusted R-squared:  0.6295 
## F-statistic: 17.99 on 1 and 9 DF,  p-value: 0.00217
# Create a scatter plot of the raw data
plot(x = x1, 
     y = y1, 
     main = "Scatter Plot with Best-Fit Line", 
     xlab = "X-axis", 
     ylab = "Y-axis", 
     pch = 19)

# Add the best-fit line to the plot
abline(mod1, col = "red")

plot(mod1)

df$fitted_y1 <- fitted(mod1)

3.2 Model 2

summary(mod2)
## 
## Call:
## lm(formula = y2 ~ x2, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9009 -0.7609  0.1291  0.9491  1.2691 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    3.001      1.125   2.667  0.02576 * 
## x2             0.500      0.118   4.239  0.00218 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.237 on 9 degrees of freedom
## Multiple R-squared:  0.6662, Adjusted R-squared:  0.6292 
## F-statistic: 17.97 on 1 and 9 DF,  p-value: 0.002179
# Create a scatter plot of the raw data
plot(x = x2, y = y2, 
     main = "Scatter Plot with Best-Fit Line", 
     xlab = "X-axis", 
     ylab = "Y-axis", 
     pch = 19)

# Add the best-fit line to the plot
abline(mod2, col = "red")

plot(mod2)

df$fitted_y2 <- fitted(mod2)

Try a different specification:

3.3 Model 3

summary(mod3)
## 
## Call:
## lm(formula = y3 ~ x3, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1586 -0.6146 -0.2303  0.1540  3.2411 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   3.0025     1.1245   2.670  0.02562 * 
## x3            0.4997     0.1179   4.239  0.00218 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.236 on 9 degrees of freedom
## Multiple R-squared:  0.6663, Adjusted R-squared:  0.6292 
## F-statistic: 17.97 on 1 and 9 DF,  p-value: 0.002176
# Create a scatter plot of the raw data
plot(x = x3, y = y3, 
     main = "Scatter Plot with Best-Fit Line", 
     xlab = "X-axis", 
     ylab = "Y-axis", 
     pch = 19)

# Add the best-fit line to the plot
abline(mod3, col = "red")

plot(mod3)

df$fitted_y3 <- fitted(mod3)

3.4 Model 4

summary(mod4)
## 
## Call:
## lm(formula = y4 ~ x4, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.751 -0.831  0.000  0.809  1.839 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   3.0017     1.1239   2.671  0.02559 * 
## x4            0.4999     0.1178   4.243  0.00216 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.236 on 9 degrees of freedom
## Multiple R-squared:  0.6667, Adjusted R-squared:  0.6297 
## F-statistic:    18 on 1 and 9 DF,  p-value: 0.002165
# Create a scatter plot of the raw data
plot(x = x4, y = y4, 
     main = "Scatter Plot with Best-Fit Line", 
     xlab = "X-axis", 
     ylab = "Y-axis", 
     pch = 19)

# Add the best-fit line to the plot
abline(mod4, col = "red")

plot(mod4)
## Warning: not plotting observations with leverage one:
##   8

df$fitted_y4 <- fitted(mod4)