# Clear the workspace
  rm(list = ls())  # Clear environment
  gc()             # Clear unused memory
##          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 531021 28.4    1182564 63.2         NA   669277 35.8
## Vcells 974118  7.5    8388608 64.0      16384  1840364 14.1
  cat("\f")        # Clear the console
  if(!is.null(dev.list())) dev.off() # Clear all plots
## null device 
##           1

1 Data

x=1:15
y=c(59, 50, 44, 38, 33, 28, 23, 20, 17, 15, 13, 12, 11, 10, 9.5)

plot(x = x, y = y)

2 Non-transformed regression

reg1 <- lm(formula = y~x)

summary(reg1)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.884 -4.000 -1.036  3.308  9.812 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  52.5714     2.7838   18.89 7.82e-11 ***
## x            -3.3839     0.3062  -11.05 5.57e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.123 on 13 degrees of freedom
## Multiple R-squared:  0.9038, Adjusted R-squared:  0.8964 
## F-statistic: 122.2 on 1 and 13 DF,  p-value: 5.57e-08
plot(reg1)

3 Transformed regression

3.1 log(y)

plot(x = x, y = log(y))

reg2 <- lm(formula = log(y) ~ x)

summary(reg2)
## 
## Call:
## lm(formula = log(y) ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.09085 -0.06377  0.02138  0.03429  0.14854 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.15602    0.03828  108.58  < 2e-16 ***
## x           -0.13689    0.00421  -32.52 7.72e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07044 on 13 degrees of freedom
## Multiple R-squared:  0.9879, Adjusted R-squared:  0.9869 
## F-statistic:  1057 on 1 and 13 DF,  p-value: 7.723e-14
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(reg1, reg2, type="text")
## 
## ==========================================================
##                                   Dependent variable:     
##                               ----------------------------
##                                     y           log(y)    
##                                    (1)            (2)     
## ----------------------------------------------------------
## x                               -3.384***      -0.137***  
##                                  (0.306)        (0.004)   
##                                                           
## Constant                        52.571***      4.156***   
##                                  (2.784)        (0.038)   
##                                                           
## ----------------------------------------------------------
## Observations                        15            15      
## R2                                0.904          0.988    
## Adjusted R2                       0.896          0.987    
## Residual Std. Error (df = 13)     5.123          0.070    
## F Statistic (df = 1; 13)        122.152***   1,057.292*** 
## ==========================================================
## Note:                          *p<0.1; **p<0.05; ***p<0.01

3.2 log(x)

reg3 <- lm(formula = y ~ log(x))
summary(reg3)
## 
## Call:
## lm(formula = y ~ log(x))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.069 -1.313 -0.260  1.127  3.122 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  63.0686     1.4090   44.76 1.25e-15 ***
## log(x)      -20.1987     0.7019  -28.78 3.70e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.054 on 13 degrees of freedom
## Multiple R-squared:  0.9845, Adjusted R-squared:  0.9834 
## F-statistic: 828.2 on 1 and 13 DF,  p-value: 3.702e-13
plot(reg3)

stargazer(reg1, reg2, reg3, type="text")
## 
## ================================================================
##                                      Dependent variable:        
##                               ----------------------------------
##                                   y         log(y)        y     
##                                  (1)         (2)         (3)    
## ----------------------------------------------------------------
## x                             -3.384***   -0.137***             
##                                (0.306)     (0.004)              
##                                                                 
## log(x)                                                -20.199***
##                                                        (0.702)  
##                                                                 
## Constant                      52.571***    4.156***   63.069*** 
##                                (2.784)     (0.038)     (1.409)  
##                                                                 
## ----------------------------------------------------------------
## Observations                      15          15          15    
## R2                              0.904       0.988       0.985   
## Adjusted R2                     0.896       0.987       0.983   
## Residual Std. Error (df = 13)   5.123       0.070       2.054   
## F Statistic (df = 1; 13)      122.152*** 1,057.292*** 828.180***
## ================================================================
## Note:                                *p<0.1; **p<0.05; ***p<0.01

5 R squared

The coefficient of determination (\(R^2\)) is a measure of the proportion of the variance in the dependent variable that is predictable from the independent variables in a regression model. Comparing \(R^2\) values can be meaningful in certain contexts, but there are important considerations to keep in mind:

  1. Same Model Structure:

    • \(R^2\) values are comparable only when comparing models with the same structure and the same dependent variable. If the models have different specifications or predict different dependent variables, \(R^2\) comparisons become less meaningful.
  2. Nested Models:

    • When comparing models with different specifications, you should consider using the adjusted \(R^2\) or conduct formal hypothesis tests for nested models. Adjusted $R^2$ takes into account the number of predictors and penalizes model complexity.
  3. Context-Specific Comparisons:

    • $R^2$ values are context-specific and depend on the nature of the data and the research question. A higher \(R^2\) is not always better, and the appropriateness of a model should be evaluated based on the specific goals of the analysis.
  4. Interpretability:

    • The \(R^2\) value is a relative measure, and its interpretation depends on the field of study. In some fields, even small \(R^2\) values may be considered substantial, while in others, larger \(R^2\) values might be expected.
  5. Caution with High \(R^2\):

    • Extremely high \(R^2\) values can be a cause for suspicion, especially if they are not theoretically justifiable. Overfitting (capturing noise as if it were a real pattern) can lead to inflated \(R^2\) values, which might not generalize well to new data.
  6. Outliers and Influential Observations:

    • Outliers and influential observations can have a substantial impact on \(R^2\). Comparisons should be made with caution, and it’s important to assess the impact of influential observations on model stability.
  7. Cross-Validation:

    • Consider using cross-validation metrics (e.g., mean squared error, root mean squared error) for model evaluation, especially if your goal is to assess predictive performance on new data.

In summary, while comparing \(R^2\) values can provide insights into the explanatory power of different models, it should be done cautiously and with attention to the context of the analysis, the nature of the data, and the goals of the research. Adjusted \(R^2\), hypothesis tests, and other model evaluation metrics should also be considered in the comparison process.