library(dplyr)
library(dslabs)
library(ISLR2)
library(matlib)
library(wooldridge)
data("bwght")
The coefficient of log(dist) is 0.312, interpreting that if the log(dist) increases for 1 unit, the log(price) increases for 0.312 unit. Higher value the distance from a recently built garbage incinerator is, higher the price of house is.
The city would often consider to put the garbage incinerator far away from the city, so the model seems to be biased.
There are a lot of factors affecting the house price along with the distance from garbage incinerator, for example the distance from school, distance from hospital, or the local tax rate …
data(wage2)
head(wage2)
## wage hours IQ KWW educ exper tenure age married black south urban sibs
## 1 769 40 93 35 12 11 2 31 1 0 0 1 1
## 2 808 50 119 41 18 11 16 37 1 0 0 1 1
## 3 825 40 108 46 14 11 9 33 1 0 0 1 1
## 4 650 40 96 32 12 13 7 32 1 0 0 1 4
## 5 562 40 74 27 11 14 5 34 1 0 0 1 10
## 6 1400 40 116 43 16 14 2 35 1 1 0 1 1
## brthord meduc feduc lwage
## 1 2 8 8 6.645091
## 2 NA 14 14 6.694562
## 3 2 14 14 6.715384
## 4 3 12 12 6.476973
## 5 6 6 11 6.331502
## 6 2 8 NA 7.244227
data <- wage2
attach(data)
summary(wage)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 115.0 669.0 905.0 957.9 1160.0 3078.0
summary (IQ)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.0 92.0 102.0 101.3 112.0 145.0
sd(IQ)
## [1] 15.05264
wage_iq <- lm(wage ~ IQ)
summary(wage_iq)
##
## Call:
## lm(formula = wage ~ IQ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -898.7 -256.5 -47.3 201.1 2072.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 116.9916 85.6415 1.366 0.172
## IQ 8.3031 0.8364 9.927 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 384.8 on 933 degrees of freedom
## Multiple R-squared: 0.09554, Adjusted R-squared: 0.09457
## F-statistic: 98.55 on 1 and 933 DF, p-value: < 2.2e-16
logwage_iq <- lm(log(wage) ~IQ )
summary (logwage_iq)
##
## Call:
## lm(formula = log(wage) ~ IQ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.09324 -0.25547 0.02261 0.27544 1.21486
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.8869943 0.0890206 66.13 <2e-16 ***
## IQ 0.0088072 0.0008694 10.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3999 on 933 degrees of freedom
## Multiple R-squared: 0.09909, Adjusted R-squared: 0.09813
## F-statistic: 102.6 on 1 and 933 DF, p-value: < 2.2e-16
iq_educ <- lm(IQ ~ educ)
summary(iq_educ)
##
## Call:
## lm(formula = IQ ~ educ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.228 -7.262 0.907 8.772 37.373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 53.6872 2.6229 20.47 <2e-16 ***
## educ 3.5338 0.1922 18.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.9 on 933 degrees of freedom
## Multiple R-squared: 0.2659, Adjusted R-squared: 0.2652
## F-statistic: 338 on 1 and 933 DF, p-value: < 2.2e-16
logwage_edu <- lm (log(wage) ~ educ)
summary(logwage_edu)
##
## Call:
## lm(formula = log(wage) ~ educ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.94620 -0.24832 0.03507 0.27440 1.28106
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.973062 0.081374 73.40 <2e-16 ***
## educ 0.059839 0.005963 10.04 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4003 on 933 degrees of freedom
## Multiple R-squared: 0.09742, Adjusted R-squared: 0.09645
## F-statistic: 100.7 on 1 and 933 DF, p-value: < 2.2e-16
logwage_edu_iq <- lm(log(wage) ~ IQ + educ)
summary(logwage_edu_iq)
##
## Call:
## lm(formula = log(wage) ~ IQ + educ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.01601 -0.24367 0.03359 0.27960 1.23783
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.6582876 0.0962408 58.793 < 2e-16 ***
## IQ 0.0058631 0.0009979 5.875 5.87e-09 ***
## educ 0.0391199 0.0068382 5.721 1.43e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3933 on 932 degrees of freedom
## Multiple R-squared: 0.1297, Adjusted R-squared: 0.1278
## F-statistic: 69.42 on 2 and 932 DF, p-value: < 2.2e-16