# Clear the workspace
rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 531021 28.4 1182564 63.2 NA 669277 35.8
## Vcells 974118 7.5 8388608 64.0 16384 1840364 14.1
cat("\f") # Clear the console
if(!is.null(dev.list())) dev.off() # Clear all plots
## null device
## 1
x=1:15
y=c(59, 50, 44, 38, 33, 28, 23, 20, 17, 15, 13, 12, 11, 10, 9.5)
plot(x = x, y = y)
reg1 <- lm(formula = y~x)
summary(reg1)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.884 -4.000 -1.036 3.308 9.812
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.5714 2.7838 18.89 7.82e-11 ***
## x -3.3839 0.3062 -11.05 5.57e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.123 on 13 degrees of freedom
## Multiple R-squared: 0.9038, Adjusted R-squared: 0.8964
## F-statistic: 122.2 on 1 and 13 DF, p-value: 5.57e-08
plot(reg1)
plot(x = x, y = log(y))
reg2 <- lm(formula = log(y) ~ x)
summary(reg2)
##
## Call:
## lm(formula = log(y) ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.09085 -0.06377 0.02138 0.03429 0.14854
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.15602 0.03828 108.58 < 2e-16 ***
## x -0.13689 0.00421 -32.52 7.72e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07044 on 13 degrees of freedom
## Multiple R-squared: 0.9879, Adjusted R-squared: 0.9869
## F-statistic: 1057 on 1 and 13 DF, p-value: 7.723e-14
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(reg1, reg2, type="text")
##
## ==========================================================
## Dependent variable:
## ----------------------------
## y log(y)
## (1) (2)
## ----------------------------------------------------------
## x -3.384*** -0.137***
## (0.306) (0.004)
##
## Constant 52.571*** 4.156***
## (2.784) (0.038)
##
## ----------------------------------------------------------
## Observations 15 15
## R2 0.904 0.988
## Adjusted R2 0.896 0.987
## Residual Std. Error (df = 13) 5.123 0.070
## F Statistic (df = 1; 13) 122.152*** 1,057.292***
## ==========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
reg3 <- lm(formula = y ~ log(x))
summary(reg3)
##
## Call:
## lm(formula = y ~ log(x))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.069 -1.313 -0.260 1.127 3.122
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 63.0686 1.4090 44.76 1.25e-15 ***
## log(x) -20.1987 0.7019 -28.78 3.70e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.054 on 13 degrees of freedom
## Multiple R-squared: 0.9845, Adjusted R-squared: 0.9834
## F-statistic: 828.2 on 1 and 13 DF, p-value: 3.702e-13
plot(reg3)
stargazer(reg1, reg2, reg3, type="text")
##
## ================================================================
## Dependent variable:
## ----------------------------------
## y log(y) y
## (1) (2) (3)
## ----------------------------------------------------------------
## x -3.384*** -0.137***
## (0.306) (0.004)
##
## log(x) -20.199***
## (0.702)
##
## Constant 52.571*** 4.156*** 63.069***
## (2.784) (0.038) (1.409)
##
## ----------------------------------------------------------------
## Observations 15 15 15
## R2 0.904 0.988 0.985
## Adjusted R2 0.896 0.987 0.983
## Residual Std. Error (df = 13) 5.123 0.070 2.054
## F Statistic (df = 1; 13) 122.152*** 1,057.292*** 828.180***
## ================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
https://dev.to/rokaandy/logarithmic-transformation-in-linear-regression-models-why-when-3a7c
\(R^2\)
The coefficient of determination (\(R^2\)) is a measure of the proportion of the variance in the dependent variable that is predictable from the independent variables in a regression model. Comparing \(R^2\) values can be meaningful in certain contexts, but there are important considerations to keep in mind:
Same Model Structure:
Nested Models:
Context-Specific Comparisons:
Interpretability:
Caution with High \(R^2\):
Outliers and Influential Observations:
Cross-Validation:
In summary, while comparing \(R^2\) values can provide insights into the explanatory power of different models, it should be done cautiously and with attention to the context of the analysis, the nature of the data, and the goals of the research. Adjusted \(R^2\), hypothesis tests, and other model evaluation metrics should also be considered in the comparison process.