c1
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dslabs)
library(ISLR2)
library(matlib)
library(wooldridge)
data("kielmc")
View(kielmc)
data <- kielmc
attach(data)
model_i <- lm(log(price)~ log(dist))
summary(model_i)
##
## Call:
## lm(formula = log(price) ~ log(dist))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.22356 -0.28076 -0.05527 0.27992 1.29332
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.25750 0.47383 17.427 < 2e-16 ***
## log(dist) 0.31722 0.04811 6.594 1.78e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4117 on 319 degrees of freedom
## Multiple R-squared: 0.1199, Adjusted R-squared: 0.1172
## F-statistic: 43.48 on 1 and 319 DF, p-value: 1.779e-10
# log(price) = 8.2575 + 0.31722*log(dist)
model_ii <- lm(log(price)~ log(dist) + log(intst) + log(area) + log(land) + rooms + baths + age)
summary(model_ii)
##
## Call:
## lm(formula = log(price) ~ log(dist) + log(intst) + log(area) +
## log(land) + rooms + baths + age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.35838 -0.18220 0.00115 0.20532 0.82180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.2996586 0.5960546 10.569 < 2e-16 ***
## log(dist) 0.0281887 0.0532130 0.530 0.59667
## log(intst) -0.0437804 0.0424359 -1.032 0.30302
## log(area) 0.5124071 0.0698229 7.339 1.87e-12 ***
## log(land) 0.0782098 0.0337206 2.319 0.02102 *
## rooms 0.0503129 0.0235113 2.140 0.03313 *
## baths 0.1070528 0.0352304 3.039 0.00258 **
## age -0.0035630 0.0005774 -6.171 2.10e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2828 on 313 degrees of freedom
## Multiple R-squared: 0.5925, Adjusted R-squared: 0.5834
## F-statistic: 65.02 on 7 and 313 DF, p-value: < 2.2e-16
# the constant of log(dist) is smaller, and the effect of distance is no longer significant
model_iii <- lm(log(price)~ log(dist) + I(log(intst)^2) + log(intst) + log(area) + log(land) + rooms + baths + age )
summary(model_iii)
##
## Call:
## lm(formula = log(price) ~ log(dist) + I(log(intst)^2) + log(intst) +
## log(area) + log(land) + rooms + baths + age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.41713 -0.17774 0.01012 0.19298 0.72089
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.7907630 2.2957491 -1.651 0.09970 .
## log(dist) 0.1897589 0.0626908 3.027 0.00268 **
## I(log(intst)^2) -0.1128430 0.0248462 -4.542 7.98e-06 ***
## log(intst) 1.9024997 0.4305113 4.419 1.37e-05 ***
## log(area) 0.5137247 0.0677323 7.585 3.86e-13 ***
## log(land) 0.1068761 0.0333141 3.208 0.00147 **
## rooms 0.0494792 0.0228078 2.169 0.03081 *
## baths 0.0898785 0.0343838 2.614 0.00938 **
## age -0.0035699 0.0005601 -6.373 6.64e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2743 on 312 degrees of freedom
## Multiple R-squared: 0.6178, Adjusted R-squared: 0.608
## F-statistic: 63.04 on 8 and 312 DF, p-value: < 2.2e-16
model_iv <- lm(log(price)~ I(log(dist)^2) + log(dist) + I(log(intst)^2) + log(intst) + log(area) + log(land) + rooms + baths + age )
summary(model_iv)
##
## Call:
## lm(formula = log(price) ~ I(log(dist)^2) + log(dist) + I(log(intst)^2) +
## log(intst) + log(area) + log(land) + rooms + baths + age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.42169 -0.17727 -0.00092 0.19645 0.71832
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.110e+01 7.006e+00 -1.585 0.11399
## I(log(dist)^2) -1.026e-01 9.287e-02 -1.105 0.27007
## log(dist) 2.110e+00 1.739e+00 1.213 0.22595
## I(log(intst)^2) -8.888e-02 3.298e-02 -2.695 0.00742 **
## log(intst) 1.520e+00 5.521e-01 2.754 0.00624 **
## log(area) 5.062e-01 6.805e-02 7.439 9.94e-13 ***
## log(land) 9.694e-02 3.449e-02 2.810 0.00526 **
## rooms 4.776e-02 2.285e-02 2.090 0.03746 *
## baths 8.938e-02 3.437e-02 2.600 0.00976 **
## age -3.523e-03 5.615e-04 -6.274 1.17e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2743 on 311 degrees of freedom
## Multiple R-squared: 0.6193, Adjusted R-squared: 0.6083
## F-statistic: 56.21 on 9 and 311 DF, p-value: < 2.2e-16
# log(dist) is no longer significant when adding the log(dist)^2 to the fomula
C2
data("wage1")
attach(wage1)
model1 <- lm( log(wage) ~ educ + exper + I(exper^2))
summary(model1)
##
## Call:
## lm(formula = log(wage) ~ educ + exper + I(exper^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.96387 -0.29375 -0.04009 0.29497 1.30216
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1279975 0.1059323 1.208 0.227
## educ 0.0903658 0.0074680 12.100 < 2e-16 ***
## exper 0.0410089 0.0051965 7.892 1.77e-14 ***
## I(exper^2) -0.0007136 0.0001158 -6.164 1.42e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4459 on 522 degrees of freedom
## Multiple R-squared: 0.3003, Adjusted R-squared: 0.2963
## F-statistic: 74.67 on 3 and 522 DF, p-value: < 2.2e-16
# log(wage) = 0.2168 + 0.09036*educ + 0.041*exper - 0.0007*exper^2