Linear Regression

Author

AS

Greek alphabets for equation typing.

\[ gpa_i = \beta_0 + \beta_1 hours\_studied_i + \epsilon_i \]

remove(list=ls())
hours_studied <- c(2.5 , 4 ,  2  ,0 ,3  ,2.5 , 0.5 , 6 , 0 , 2 , 1, 2  )
gpa           <- c(3.7 , 3.9, 3.8, 4,3.7,3.6 , 1.2 , 4 , 0 , 4 , 2, 3.7)

cov(x = hours_studied, y = gpa)
[1] 1.331818
cor(x = hours_studied, y = gpa)
[1] 0.5842855
library(ggplot2)
ggplot2::ggplot(mapping = aes(x = hours_studied, y = gpa)) +  geom_point() +geom_abline()

reg1 <- 
lm(formula = gpa ~ hours_studied)


round(sum(reg1$residuals), 15)
[1] 0
sum(reg1$residuals^2)
[1] 12.71557
sum(reg1$residuals)^2 # 1.232595e-32
[1] 1.232595e-32
ggplot2::ggplot(mapping = aes(x = hours_studied, y = reg1$fitted.values)) + geom_point()+geom_line()

summary(reg1)

Call:
lm(formula = gpa ~ hours_studied)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.1773 -0.6896  0.2355  0.6479  1.8227 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept)     2.1773     0.5313   4.098  0.00215 **
hours_studied   0.4499     0.1976   2.277  0.04604 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.128 on 10 degrees of freedom
Multiple R-squared:  0.3414,    Adjusted R-squared:  0.2755 
F-statistic: 5.183 on 1 and 10 DF,  p-value: 0.04604
var(x = hours_studied)
[1] 2.960227
cov(y = gpa, x = hours_studied)
[1] 1.331818
beta1 <- cov(y = gpa, x = hours_studied) / var(x = hours_studied) 
beta1
[1] 0.449904

\[ \bar y = \beta_0 + \beta_1 \bar x \]

x_bar <- mean(x = hours_studied)
y_bar <- mean(x = gpa)

beta0 <- y_bar - beta1 * x_bar
beta0
[1] 2.177287

length(hours_studied)
[1] 12
time_wasted_class <- c(2,10,5,8,3,1,6,1,10,9,.3,4)
time_wasted_AS <- c(3, 5, 8,2, 4,6, 
                 4, 5, 7, 1, 2,4)

time_wasted_R <- rnorm(n = 12, 
                       mean = 3, 
                       sd = 1
                       )

reg2  <- 
lm(formula = gpa ~ hours_studied + time_wasted_class)

reg3  <- 
lm(formula = gpa ~ hours_studied + time_wasted_AS)

reg4  <- 
lm(formula = gpa ~ hours_studied + time_wasted_R)

library(stargazer)

Please cite as: 
 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
stargazer(reg1, reg2, reg3,reg4, type = "text")

=================================================================================================
                                                 Dependent variable:                             
                    -----------------------------------------------------------------------------
                                                         gpa                                     
                            (1)                 (2)               (3)                 (4)        
-------------------------------------------------------------------------------------------------
hours_studied             0.450**             0.449*            0.488**             0.416**      
                          (0.198)             (0.223)           (0.194)             (0.181)      
                                                                                                 
time_wasted_class                             -0.002                                             
                                              (0.106)                                            
                                                                                                 
time_wasted_AS                                                   -0.203                          
                                                                (0.159)                          
                                                                                                 
time_wasted_R                                                                       -0.481       
                                                                                    (0.275)      
                                                                                                 
Constant                  2.177***            2.188**           2.958***           3.279***      
                          (0.531)             (0.889)           (0.802)             (0.795)      
                                                                                                 
-------------------------------------------------------------------------------------------------
Observations                 12                 12                 12                 12         
R2                         0.341               0.341             0.442               0.508       
Adjusted R2                0.276               0.195             0.318               0.399       
Residual Std. Error   1.128 (df = 10)     1.189 (df = 9)     1.094 (df = 9)     1.027 (df = 9)   
F Statistic         5.183** (df = 1; 10) 2.333 (df = 2; 9) 3.562* (df = 2; 9) 4.646** (df = 2; 9)
=================================================================================================
Note:                                                                 *p<0.1; **p<0.05; ***p<0.01
ln_gpa <- log(gpa+.00000001)
sqrt_gpa <- gpa^.5


reg5  <- 
lm(formula = ln_gpa ~ hours_studied)
reg6  <- 
lm(formula = sqrt_gpa ~ hours_studied)


library(stargazer)
stargazer(reg1, reg5,reg6, type = "text")

===========================================================
                                   Dependent variable:     
                              -----------------------------
                                 gpa      ln_gpa   sqrt_gpa
                                 (1)        (2)      (3)   
-----------------------------------------------------------
hours_studied                  0.450**     1.371    0.191* 
                               (0.198)    (0.947)  (0.092) 
                                                           
Constant                       2.177***   -3.371   1.270***
                               (0.531)    (2.547)  (0.247) 
                                                           
-----------------------------------------------------------
Observations                      12        12        12   
R2                              0.341      0.173    0.302  
Adjusted R2                     0.276      0.091    0.232  
Residual Std. Error (df = 10)   1.128      5.406    0.524  
F Statistic (df = 1; 10)       5.183**     2.096    4.326* 
===========================================================
Note:                           *p<0.1; **p<0.05; ***p<0.01
plot(reg1)