\(~\)
\(~\)
data <- read.table("./Week 7 - Test.txt", header = TRUE)
reg <- lm(data = data, sell ~ lot + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg)
summary(reg)
##
## Call:
## lm(formula = sell ~ lot + bdms + fb + sty + drv + rec + ffin +
## ghw + ca + gar + reg, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41389 -9307 -591 7353 74875
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4038.3504 3409.4713 -1.184 0.236762
## lot 3.5463 0.3503 10.124 < 2e-16 ***
## bdms 1832.0035 1047.0002 1.750 0.080733 .
## fb 14335.5585 1489.9209 9.622 < 2e-16 ***
## sty 6556.9457 925.2899 7.086 4.37e-12 ***
## drv 6687.7789 2045.2458 3.270 0.001145 **
## rec 4511.2838 1899.9577 2.374 0.017929 *
## ffin 5452.3855 1588.0239 3.433 0.000642 ***
## ghw 12831.4063 3217.5971 3.988 7.60e-05 ***
## ca 12632.8904 1555.0211 8.124 3.15e-15 ***
## gar 4244.8290 840.5442 5.050 6.07e-07 ***
## reg 9369.5132 1669.0907 5.614 3.19e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15420 on 534 degrees of freedom
## Multiple R-squared: 0.6731, Adjusted R-squared: 0.6664
## F-statistic: 99.97 on 11 and 534 DF, p-value: < 2.2e-16
reset <- resettest(reg, power = 2, type = "fitted")
jarque <- jarque.bera.test(reg$residuals)
format(data.frame("Ramsey RESET" = c(reset$statistic, reset$p.value), "Jarque-Bera" = c(jarque$statistic, jarque$p.value), row.names = c("Statistic", "P-Value")), scientific= FALSE)
## Ramsey.RESET Jarque.Bera
## Statistic 26.986035203525 247.6198
## P-Value 0.000000292211 0.0000
\(~\)
With significant statistics and p-values close to 0, both tests reject the null hypothesis that the model has been correctly specified, meaning it is not a good fit for the data.
\(~\)
\(~\)
reg2 <- lm(data = data, log(sell) ~ lot + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg)
summary(reg2)
##
## Call:
## lm(formula = log(sell) ~ lot + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67865 -0.12211 0.01666 0.12868 0.67737
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.003e+01 4.724e-02 212.210 < 2e-16 ***
## lot 5.057e-05 4.854e-06 10.418 < 2e-16 ***
## bdms 3.402e-02 1.451e-02 2.345 0.01939 *
## fb 1.678e-01 2.065e-02 8.126 3.10e-15 ***
## sty 9.227e-02 1.282e-02 7.197 2.10e-12 ***
## drv 1.307e-01 2.834e-02 4.610 5.04e-06 ***
## rec 7.352e-02 2.633e-02 2.792 0.00542 **
## ffin 9.940e-02 2.200e-02 4.517 7.72e-06 ***
## ghw 1.784e-01 4.458e-02 4.000 7.22e-05 ***
## ca 1.780e-01 2.155e-02 8.262 1.14e-15 ***
## gar 5.076e-02 1.165e-02 4.358 1.58e-05 ***
## reg 1.271e-01 2.313e-02 5.496 6.02e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2137 on 534 degrees of freedom
## Multiple R-squared: 0.6766, Adjusted R-squared: 0.6699
## F-statistic: 101.6 on 11 and 534 DF, p-value: < 2.2e-16
reset2 <- resettest(reg2, power = 2, type = "fitted")
jarque2 <- jarque.bera.test(reg2$residuals)
format(data.frame("Ramsey RESET" = c(reset2$statistic, reset2$p.value), "Jarque-Bera" = c(jarque2$statistic, jarque2$p.value), row.names = c("Statistic", "P-Value")), scientific=FALSE)
## Ramsey.RESET Jarque.Bera
## Statistic 0.2703143 8.44324122
## P-Value 0.6033369 0.01467484
\(~\)
With a p-value above 0.05, the RESET Test fails to reject the null hypothesis, suggesting that the new model is not misspecified. The Jarque-Bera test, on the other hand, still rejects its null hypothesis, indicating that residuals are not normally distributed.
\(~\)
\(~\)
reg3 <- lm(data = data, log(sell) ~ lot + log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg)
summary(reg3)
##
## Call:
## lm(formula = log(sell) ~ lot + log(lot) + bdms + fb + sty + drv +
## rec + ffin + ghw + ca + gar + reg, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68573 -0.12380 0.00785 0.12521 0.68112
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.150e+00 6.830e-01 10.469 < 2e-16 ***
## lot -1.490e-05 1.624e-05 -0.918 0.359086
## log(lot) 3.827e-01 9.070e-02 4.219 2.88e-05 ***
## bdms 3.489e-02 1.429e-02 2.442 0.014915 *
## fb 1.659e-01 2.033e-02 8.161 2.40e-15 ***
## sty 9.121e-02 1.263e-02 7.224 1.76e-12 ***
## drv 1.068e-01 2.847e-02 3.752 0.000195 ***
## rec 5.467e-02 2.630e-02 2.078 0.038156 *
## ffin 1.052e-01 2.171e-02 4.848 1.64e-06 ***
## ghw 1.791e-01 4.390e-02 4.079 5.20e-05 ***
## ca 1.643e-01 2.146e-02 7.657 9.01e-14 ***
## gar 4.826e-02 1.148e-02 4.203 3.09e-05 ***
## reg 1.344e-01 2.284e-02 5.884 7.10e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2104 on 533 degrees of freedom
## Multiple R-squared: 0.687, Adjusted R-squared: 0.68
## F-statistic: 97.51 on 12 and 533 DF, p-value: < 2.2e-16
reset3 <- resettest(reg3, power = 2, type = "fitted")
jarque3 <- jarque.bera.test(reg3$residuals)
format(data.frame("Ramsey RESET" = c(reset3$statistic, reset3$p.value), "Jarque-Bera" = c(jarque3$statistic, jarque3$p.value), row.names = c("Statistic", "P-Value")), scientific=FALSE)
## Ramsey.RESET Jarque.Bera
## Statistic 0.067690 9.364331765
## P-Value 0.794831 0.009258938
\(~\)
While the Jarque-bera test still rejects the null hypothesis, the values of the RESET test present even stronger evidence against the null hypothesis than on the last model, suggesting that including the logarithm of the lot size is beneficial for the model’s specification. In this model, the lot size itself is no longer statistically significant.
\(~\)
\(~\)
reg4 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * bdms + log(lot) * fb + log(lot) * sty + log(lot) * drv + log(lot) * rec + log(lot) * ffin + log(lot) * ghw + log(lot) * ca + log(lot) * gar + log(lot) * reg)
summary(reg4)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * bdms + log(lot) *
## fb + log(lot) * sty + log(lot) * drv + log(lot) * rec + log(lot) *
## ffin + log(lot) * ghw + log(lot) * ca + log(lot) * gar +
## log(lot) * reg, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68306 -0.11612 0.00591 0.12486 0.65998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.966499 1.070667 8.375 5.09e-16 ***
## log(lot) 0.152685 0.128294 1.190 0.2345
## bdms 0.019075 0.326700 0.058 0.9535
## fb -0.368234 0.429048 -0.858 0.3911
## sty 0.488885 0.309700 1.579 0.1150
## drv -1.463371 0.717225 -2.040 0.0418 *
## rec 1.673992 0.655919 2.552 0.0110 *
## ffin -0.031844 0.445543 -0.071 0.9430
## ghw -0.505889 0.902733 -0.560 0.5754
## ca -0.340276 0.496041 -0.686 0.4930
## gar 0.401941 0.258646 1.554 0.1208
## reg 0.118484 0.479856 0.247 0.8051
## log(lot):bdms 0.002070 0.038654 0.054 0.9573
## log(lot):fb 0.062037 0.050145 1.237 0.2166
## log(lot):sty -0.046361 0.035942 -1.290 0.1977
## log(lot):drv 0.191542 0.087361 2.193 0.0288 *
## log(lot):rec -0.188462 0.076373 -2.468 0.0139 *
## log(lot):ffin 0.015913 0.052851 0.301 0.7635
## log(lot):ghw 0.081135 0.106929 0.759 0.4483
## log(lot):ca 0.059549 0.058024 1.026 0.3052
## log(lot):gar -0.041359 0.030142 -1.372 0.1706
## log(lot):reg 0.001515 0.055990 0.027 0.9784
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2095 on 524 degrees of freedom
## Multiple R-squared: 0.6951, Adjusted R-squared: 0.6829
## F-statistic: 56.89 on 21 and 524 DF, p-value: < 2.2e-16
\(~\)
The individually significant interactions are:
\(~\)
\(~\)
reg5 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * drv + log(lot) * rec)
data.frame("All Interactions" = summary(reg4)$fstatistic[1], "Only DRV and REV" = summary(reg5)$fstatistic[1], row.names = "F-statistic")
## All.Interactions Only.DRV.and.REV
## F-statistic 56.88817 91.78705
\(~\)
With a greater f-statistic, the joint significance of the model including only the ‘drv’ and ‘rev’ interactions is higher than the one including interactions with all variables.
\(~\)
\(~\)
summary(reg4)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * bdms + log(lot) *
## fb + log(lot) * sty + log(lot) * drv + log(lot) * rec + log(lot) *
## ffin + log(lot) * ghw + log(lot) * ca + log(lot) * gar +
## log(lot) * reg, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68306 -0.11612 0.00591 0.12486 0.65998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.966499 1.070667 8.375 5.09e-16 ***
## log(lot) 0.152685 0.128294 1.190 0.2345
## bdms 0.019075 0.326700 0.058 0.9535
## fb -0.368234 0.429048 -0.858 0.3911
## sty 0.488885 0.309700 1.579 0.1150
## drv -1.463371 0.717225 -2.040 0.0418 *
## rec 1.673992 0.655919 2.552 0.0110 *
## ffin -0.031844 0.445543 -0.071 0.9430
## ghw -0.505889 0.902733 -0.560 0.5754
## ca -0.340276 0.496041 -0.686 0.4930
## gar 0.401941 0.258646 1.554 0.1208
## reg 0.118484 0.479856 0.247 0.8051
## log(lot):bdms 0.002070 0.038654 0.054 0.9573
## log(lot):fb 0.062037 0.050145 1.237 0.2166
## log(lot):sty -0.046361 0.035942 -1.290 0.1977
## log(lot):drv 0.191542 0.087361 2.193 0.0288 *
## log(lot):rec -0.188462 0.076373 -2.468 0.0139 *
## log(lot):ffin 0.015913 0.052851 0.301 0.7635
## log(lot):ghw 0.081135 0.106929 0.759 0.4483
## log(lot):ca 0.059549 0.058024 1.026 0.3052
## log(lot):gar -0.041359 0.030142 -1.372 0.1706
## log(lot):reg 0.001515 0.055990 0.027 0.9784
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2095 on 524 degrees of freedom
## Multiple R-squared: 0.6951, Adjusted R-squared: 0.6829
## F-statistic: 56.89 on 21 and 524 DF, p-value: < 2.2e-16
\(~\)
1st eliminated interaction: reg (t-value = 0.027)
\(~\)
reg6 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * bdms + log(lot) * fb + log(lot) * sty + log(lot) * drv + log(lot) * rec + log(lot) * ffin + log(lot) * ghw + log(lot) * ca + log(lot) * gar)
summary(reg6)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * bdms + log(lot) *
## fb + log(lot) * sty + log(lot) * drv + log(lot) * rec + log(lot) *
## ffin + log(lot) * ghw + log(lot) * ca + log(lot) * gar, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68292 -0.11619 0.00573 0.12491 0.65976
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.96795 1.06831 8.394 4.37e-16 ***
## log(lot) 0.15252 0.12802 1.191 0.2341
## bdms 0.01949 0.32603 0.060 0.9523
## fb -0.36774 0.42824 -0.859 0.3909
## sty 0.48721 0.30316 1.607 0.1086
## drv -1.46786 0.69713 -2.106 0.0357 *
## rec 1.67468 0.65480 2.558 0.0108 *
## ffin -0.03494 0.43021 -0.081 0.9353
## ghw -0.50427 0.89990 -0.560 0.5755
## ca -0.33954 0.49483 -0.686 0.4929
## gar 0.40234 0.25797 1.560 0.1194
## reg 0.13145 0.02304 5.705 1.94e-08 ***
## log(lot):bdms 0.00202 0.03857 0.052 0.9582
## log(lot):fb 0.06198 0.05005 1.238 0.2161
## log(lot):sty -0.04617 0.03518 -1.312 0.1900
## log(lot):drv 0.19207 0.08504 2.259 0.0243 *
## log(lot):rec -0.18855 0.07623 -2.473 0.0137 *
## log(lot):ffin 0.01629 0.05098 0.319 0.7495
## log(lot):ghw 0.08094 0.10658 0.759 0.4479
## log(lot):ca 0.05946 0.05788 1.027 0.3047
## log(lot):gar -0.04140 0.03007 -1.377 0.1691
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2093 on 525 degrees of freedom
## Multiple R-squared: 0.6951, Adjusted R-squared: 0.6835
## F-statistic: 59.85 on 20 and 525 DF, p-value: < 2.2e-16
\(~\)
2nd eliminated interaction: bdms (t-value = 0.052)
\(~\)
reg7 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * fb + log(lot) * sty + log(lot) * drv + log(lot) * rec + log(lot) * ffin + log(lot) * ghw + log(lot) * ca + log(lot) * gar)
summary(reg7)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * fb + log(lot) *
## sty + log(lot) * drv + log(lot) * rec + log(lot) * ffin +
## log(lot) * ghw + log(lot) * ca + log(lot) * gar, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68301 -0.11617 0.00574 0.12490 0.66020
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.93486 0.86069 10.381 < 2e-16 ***
## log(lot) 0.15647 0.10329 1.515 0.1304
## bdms 0.03655 0.01459 2.506 0.0125 *
## fb -0.37745 0.38563 -0.979 0.3281
## sty 0.48200 0.28614 1.685 0.0927 .
## drv -1.46253 0.68903 -2.123 0.0343 *
## rec 1.67592 0.65375 2.564 0.0106 *
## ffin -0.03743 0.42717 -0.088 0.9302
## ghw -0.50161 0.89761 -0.559 0.5765
## ca -0.33869 0.49409 -0.685 0.4933
## gar 0.40103 0.25652 1.563 0.1186
## reg 0.13144 0.02302 5.710 1.89e-08 ***
## log(lot):fb 0.06313 0.04494 1.405 0.1607
## log(lot):sty -0.04556 0.03321 -1.372 0.1706
## log(lot):drv 0.19143 0.08408 2.277 0.0232 *
## log(lot):rec -0.18868 0.07612 -2.479 0.0135 *
## log(lot):ffin 0.01658 0.05062 0.328 0.7434
## log(lot):ghw 0.08062 0.10631 0.758 0.4486
## log(lot):ca 0.05935 0.05779 1.027 0.3049
## log(lot):gar -0.04125 0.02989 -1.380 0.1682
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2091 on 526 degrees of freedom
## Multiple R-squared: 0.6951, Adjusted R-squared: 0.6841
## F-statistic: 63.12 on 19 and 526 DF, p-value: < 2.2e-16
\(~\)
3rd eliminated interaction: ffin (t-value = 0.328)
\(~\)
reg8 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * fb + log(lot) * sty + log(lot) * drv + log(lot) * rec + log(lot) * ghw + log(lot) * ca + log(lot) * gar)
summary(reg8)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * fb + log(lot) *
## sty + log(lot) * drv + log(lot) * rec + log(lot) * ghw +
## log(lot) * ca + log(lot) * gar, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68181 -0.11724 0.00567 0.12594 0.65662
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.87651 0.84134 10.550 < 2e-16 ***
## log(lot) 0.16359 0.10089 1.621 0.1055
## bdms 0.03655 0.01458 2.507 0.0125 *
## fb -0.38191 0.38506 -0.992 0.3217
## sty 0.48851 0.28520 1.713 0.0873 .
## drv -1.45022 0.68742 -2.110 0.0354 *
## rec 1.62140 0.63167 2.567 0.0105 *
## ffin 0.10232 0.02181 4.691 3.47e-06 ***
## ghw -0.51600 0.89578 -0.576 0.5648
## ca -0.35449 0.49131 -0.722 0.4709
## gar 0.40146 0.25629 1.566 0.1179
## reg 0.13227 0.02286 5.786 1.24e-08 ***
## log(lot):fb 0.06360 0.04487 1.417 0.1570
## log(lot):sty -0.04640 0.03308 -1.403 0.1613
## log(lot):drv 0.18991 0.08388 2.264 0.0240 *
## log(lot):rec -0.18218 0.07343 -2.481 0.0134 *
## log(lot):ghw 0.08250 0.10606 0.778 0.4370
## log(lot):ca 0.06123 0.05746 1.066 0.2871
## log(lot):gar -0.04129 0.02987 -1.383 0.1674
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2089 on 527 degrees of freedom
## Multiple R-squared: 0.695, Adjusted R-squared: 0.6846
## F-statistic: 66.73 on 18 and 527 DF, p-value: < 2.2e-16
\(~\)
4th eliminated interaction: ghw (t-value = 0.778)
\(~\)
reg9 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * fb + log(lot) * sty + log(lot) * drv + log(lot) * rec + log(lot) * ca + log(lot) * gar)
summary(reg9)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * fb + log(lot) *
## sty + log(lot) * drv + log(lot) * rec + log(lot) * ca + log(lot) *
## gar, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68122 -0.11898 0.00738 0.12611 0.65311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.80857 0.83648 10.530 < 2e-16 ***
## log(lot) 0.17159 0.10033 1.710 0.0878 .
## bdms 0.03661 0.01457 2.513 0.0123 *
## fb -0.37636 0.38485 -0.978 0.3286
## sty 0.49092 0.28508 1.722 0.0857 .
## drv -1.43262 0.68679 -2.086 0.0375 *
## rec 1.63058 0.63133 2.583 0.0101 *
## ffin 0.10361 0.02174 4.766 2.44e-06 ***
## ghw 0.17991 0.04391 4.098 4.83e-05 ***
## ca -0.33972 0.49076 -0.692 0.4891
## gar 0.39730 0.25614 1.551 0.1215
## reg 0.13113 0.02281 5.750 1.51e-08 ***
## log(lot):fb 0.06302 0.04485 1.405 0.1606
## log(lot):sty -0.04669 0.03306 -1.412 0.1585
## log(lot):drv 0.18782 0.08380 2.241 0.0254 *
## log(lot):rec -0.18320 0.07339 -2.496 0.0129 *
## log(lot):ca 0.05932 0.05738 1.034 0.3017
## log(lot):gar -0.04085 0.02985 -1.368 0.1718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2088 on 528 degrees of freedom
## Multiple R-squared: 0.6947, Adjusted R-squared: 0.6849
## F-statistic: 70.67 on 17 and 528 DF, p-value: < 2.2e-16
\(~\)
5th eliminated interaction: ca (t-value = 1.034)
\(~\)
reg10 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * fb + log(lot) * sty + log(lot) * drv + log(lot) * rec + log(lot) * gar)
summary(reg10)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * fb + log(lot) *
## sty + log(lot) * drv + log(lot) * rec + log(lot) * gar, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67934 -0.12004 0.00644 0.12660 0.64601
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.78218 0.83615 10.503 < 2e-16 ***
## log(lot) 0.17484 0.10028 1.743 0.0818 .
## bdms 0.03523 0.01451 2.428 0.0155 *
## fb -0.38030 0.38485 -0.988 0.3235
## sty 0.47196 0.28451 1.659 0.0977 .
## drv -1.42861 0.68683 -2.080 0.0380 *
## rec 1.55669 0.62731 2.482 0.0134 *
## ffin 0.10341 0.02174 4.756 2.55e-06 ***
## ghw 0.17721 0.04383 4.043 6.06e-05 ***
## ca 0.16716 0.02121 7.880 1.87e-14 ***
## gar 0.33850 0.24976 1.355 0.1759
## reg 0.13298 0.02274 5.848 8.69e-09 ***
## log(lot):fb 0.06359 0.04485 1.418 0.1569
## log(lot):sty -0.04432 0.03299 -1.344 0.1797
## log(lot):drv 0.18733 0.08381 2.235 0.0258 *
## log(lot):rec -0.17463 0.07293 -2.395 0.0170 *
## log(lot):gar -0.03385 0.02908 -1.164 0.2448
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2088 on 529 degrees of freedom
## Multiple R-squared: 0.6941, Adjusted R-squared: 0.6848
## F-statistic: 75.01 on 16 and 529 DF, p-value: < 2.2e-16
\(~\)
6th eliminated interaction: gar (t-value = -1.164)
\(~\)
reg11 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * fb + log(lot) * sty + log(lot) * drv + log(lot) * rec)
summary(reg11)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * fb + log(lot) *
## sty + log(lot) * drv + log(lot) * rec, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68420 -0.12071 0.00669 0.12322 0.64513
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.77393 0.83640 10.490 < 2e-16 ***
## log(lot) 0.17584 0.10031 1.753 0.0802 .
## bdms 0.03530 0.01451 2.432 0.0153 *
## fb -0.34021 0.38344 -0.887 0.3753
## sty 0.46819 0.28459 1.645 0.1005
## drv -1.23688 0.66702 -1.854 0.0642 .
## rec 1.51405 0.62645 2.417 0.0160 *
## ffin 0.10279 0.02174 4.727 2.92e-06 ***
## ghw 0.18002 0.04378 4.112 4.55e-05 ***
## ca 0.16697 0.02122 7.869 2.02e-14 ***
## gar 0.04802 0.01143 4.200 3.13e-05 ***
## reg 0.12990 0.02259 5.750 1.51e-08 ***
## log(lot):fb 0.05903 0.04469 1.321 0.1872
## log(lot):sty -0.04392 0.03300 -1.331 0.1837
## log(lot):drv 0.16448 0.08150 2.018 0.0441 *
## log(lot):rec -0.16943 0.07281 -2.327 0.0203 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2089 on 530 degrees of freedom
## Multiple R-squared: 0.6933, Adjusted R-squared: 0.6846
## F-statistic: 79.87 on 15 and 530 DF, p-value: < 2.2e-16
\(~\)
7th eliminated interaction: fb (t-value = 1.321)
\(~\)
reg12 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * sty + log(lot) * drv + log(lot) * rec)
summary(reg12)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * sty + log(lot) *
## drv + log(lot) * rec, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68209 -0.11831 0.00758 0.12350 0.63856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.29846 0.75549 10.984 < 2e-16 ***
## log(lot) 0.23088 0.09131 2.529 0.0117 *
## bdms 0.03623 0.01451 2.497 0.0128 *
## fb 0.16549 0.02061 8.030 6.30e-15 ***
## sty 0.38420 0.27758 1.384 0.1669
## drv -1.25462 0.66735 -1.880 0.0607 .
## rec 1.47254 0.62610 2.352 0.0190 *
## ffin 0.10042 0.02168 4.631 4.58e-06 ***
## ghw 0.18093 0.04381 4.130 4.21e-05 ***
## ca 0.16623 0.02123 7.831 2.64e-14 ***
## gar 0.04751 0.01143 4.155 3.79e-05 ***
## reg 0.13126 0.02258 5.812 1.06e-08 ***
## log(lot):sty -0.03402 0.03216 -1.058 0.2906
## log(lot):drv 0.16690 0.08154 2.047 0.0412 *
## log(lot):rec -0.16467 0.07278 -2.263 0.0241 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2091 on 531 degrees of freedom
## Multiple R-squared: 0.6923, Adjusted R-squared: 0.6842
## F-statistic: 85.33 on 14 and 531 DF, p-value: < 2.2e-16
\(~\)
8th eliminated interaction: sty (t-value = -1.058)
\(~\)
reg13 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * drv + log(lot) * rec)
summary(reg13)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * drv + log(lot) *
## rec, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67934 -0.12225 0.00849 0.12259 0.65051
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.74189 0.62863 13.906 < 2e-16 ***
## log(lot) 0.17906 0.07707 2.323 0.02053 *
## bdms 0.03881 0.01430 2.714 0.00686 **
## fb 0.16145 0.02025 7.971 9.62e-15 ***
## sty 0.09083 0.01254 7.242 1.56e-12 ***
## drv -1.18996 0.66462 -1.790 0.07395 .
## rec 1.50253 0.62553 2.402 0.01665 *
## ffin 0.10276 0.02157 4.763 2.46e-06 ***
## ghw 0.18448 0.04368 4.223 2.83e-05 ***
## ca 0.16526 0.02121 7.792 3.48e-14 ***
## gar 0.04690 0.01142 4.107 4.65e-05 ***
## reg 0.13260 0.02255 5.880 7.24e-09 ***
## log(lot):drv 0.15943 0.08124 1.962 0.05024 .
## log(lot):rec -0.16826 0.07270 -2.314 0.02103 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2091 on 532 degrees of freedom
## Multiple R-squared: 0.6916, Adjusted R-squared: 0.6841
## F-statistic: 91.79 on 13 and 532 DF, p-value: < 2.2e-16
\(~\)
9th eliminated interaction: drv (t-value = -1.962)
\(~\)
reg14 <- lm(data = data, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg + log(lot) * rec)
summary(reg14)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg + log(lot) * rec, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68111 -0.12208 0.00593 0.12731 0.66275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.59071 0.22656 33.505 < 2e-16 ***
## log(lot) 0.32024 0.02770 11.562 < 2e-16 ***
## bdms 0.03842 0.01434 2.680 0.0076 **
## fb 0.16318 0.02029 8.043 5.71e-15 ***
## sty 0.09080 0.01258 7.220 1.80e-12 ***
## drv 0.11312 0.02815 4.018 6.72e-05 ***
## rec 1.44313 0.62646 2.304 0.0216 *
## ffin 0.10450 0.02161 4.835 1.74e-06 ***
## ghw 0.18429 0.04380 4.208 3.03e-05 ***
## ca 0.16593 0.02126 7.804 3.19e-14 ***
## gar 0.04810 0.01144 4.206 3.05e-05 ***
## reg 0.13373 0.02260 5.917 5.89e-09 ***
## log(lot):rec -0.16112 0.07281 -2.213 0.0273 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2096 on 533 degrees of freedom
## Multiple R-squared: 0.6894, Adjusted R-squared: 0.6824
## F-statistic: 98.59 on 12 and 533 DF, p-value: < 2.2e-16
\(~\)
No interactions left to eliminate.
\(~\)
\(~\)
Endogeneity causes overestimation when the omitted variable has the same effect on the dependent and independent variables, while underestimation happens when the effect is opposite.
The condition of a house tends to increase both sale prices and likelihood of air conditioning, meaning it affects the dependent and independent variables in similar ways. This would cause an overestimation of the effect of air conditioning on sale prices.
\(~\)
\(~\)
data_sub <- data[1:400,]
reg_sub <- lm(data = data_sub, log(sell) ~ log(lot) + bdms + fb + sty + drv + rec + ffin + ghw + ca + gar + reg)
summary(reg_sub)
##
## Call:
## lm(formula = log(sell) ~ log(lot) + bdms + fb + sty + drv + rec +
## ffin + ghw + ca + gar + reg, data = data_sub)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66582 -0.13906 0.00796 0.14694 0.67596
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.67309 0.29240 26.241 < 2e-16 ***
## log(lot) 0.31378 0.03615 8.680 < 2e-16 ***
## bdms 0.03787 0.01744 2.172 0.030469 *
## fb 0.15238 0.02469 6.170 1.71e-09 ***
## sty 0.08824 0.01819 4.850 1.79e-06 ***
## drv 0.08641 0.03141 2.751 0.006216 **
## rec 0.05465 0.03392 1.611 0.107975
## ffin 0.11471 0.02673 4.291 2.25e-05 ***
## ghw 0.19870 0.05301 3.748 0.000205 ***
## ca 0.17763 0.02724 6.521 2.17e-10 ***
## gar 0.05301 0.01480 3.583 0.000383 ***
## reg 0.15116 0.04215 3.586 0.000378 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2238 on 388 degrees of freedom
## Multiple R-squared: 0.6705, Adjusted R-squared: 0.6611
## F-statistic: 71.77 on 11 and 388 DF, p-value: < 2.2e-16
new <- data[401:546,]
new <- data.frame(new, "log.sell" = log(new$sell))
forecast <- predict.lm(reg_sub, newdata = new)
comparison <- data.frame("Observation" = new$obs, "Forecast" = forecast, "Actual Data" = log(new$sell))
data.frame("MAE" = mae(new$log.sell, forecast))
## MAE
## 1 0.1278416
ggplot(data = comparison, aes(x = Observation, y = Forecast))+
geom_line(aes(color = "red"))+
geom_line(y = comparison$Actual.Data, aes(color = "blue"))+
labs(x = "Observation", y = "Log(sale prices)")+
ggtitle("Forecast")+
scale_color_identity(name = "Legend", breaks = c("red", "blue"), labels =
c("Forecast", "Actual Values"), guide = "legend")
head(comparison)
## Observation Forecast Actual.Data
## 401 401 11.51385 11.43496
## 402 402 11.47529 11.23849
## 403 403 11.38201 11.25803
## 404 404 11.18935 11.28978
## 405 405 11.32946 11.28978
## 406 406 11.36018 11.36210