library(wooldridge)
## Warning: package 'wooldridge' was built under R version 4.2.3
data <- wooldridge::kielmc
data1 <- subset(data, year==1981)
View(data1)
#Question C1 #Question i).
model1 <- glm(log(data1$price) ~ log(data1$dist), data = data1)
summary(model1)
##
## Call:
## glm(formula = log(data1$price) ~ log(data1$dist), data = data1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.87318 -0.22657 -0.01985 0.25687 0.95045
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.04716 0.64624 12.452 < 2e-16 ***
## log(data1$dist) 0.36488 0.06576 5.548 1.39e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.125522)
##
## Null deviance: 21.437 on 141 degrees of freedom
## Residual deviance: 17.573 on 140 degrees of freedom
## AIC: 112.28
##
## Number of Fisher Scoring iterations: 2
#Interpretation #log(price)=b0+b1log(dist)+u #log(price)=8.045+0.365log(dist)+u #When log(dist) increases by one percentage, log(price) increases by 36.51%.
#Question ii).
model2 <- glm(log(data1$price) ~ log(data1$dist) + log(intst) + log(area) + log(land) + rooms + baths + age, data = data1)
summary(model2)
##
## Call:
## glm(formula = log(data1$price) ~ log(data1$dist) + log(intst) +
## log(area) + log(land) + rooms + baths + age, data = data1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.74072 -0.10669 0.00932 0.11817 0.61387
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.592332 0.641711 11.831 < 2e-16 ***
## log(data1$dist) 0.055389 0.057621 0.961 0.338153
## log(intst) -0.039032 0.051662 -0.756 0.451261
## log(area) 0.319294 0.076418 4.178 5.27e-05 ***
## log(land) 0.076824 0.039505 1.945 0.053908 .
## rooms 0.042528 0.028251 1.505 0.134588
## baths 0.166923 0.041944 3.980 0.000113 ***
## age -0.003567 0.001059 -3.369 0.000985 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.04038828)
##
## Null deviance: 21.437 on 141 degrees of freedom
## Residual deviance: 5.412 on 134 degrees of freedom
## AIC: -42.964
##
## Number of Fisher Scoring iterations: 2
#Interpretation #log(price)=b0+b1log(dist)+log(intst) + log(area) + log(land) + rooms + baths + age #log(price)=7.592+0.055log(dist)-0.039log(intst)+0.32log(area)+0.077log(land)+0.043rooms+0.167baths-0.0036age #Now, there are 7 independent variables in the equation. The area (square footage of the house), the number of bathrooms, the age of a house are significant to calculate the price of a house. When the number of bathrooms in a house increases by one percentage , the price of house increases by 16.69 percentages. Also, when the age of a house increases by one percentage, the price of a house reduces by 0.36 percentage. #Question iii).
model3 <- glm(log(data1$price) ~ log(data1$dist) + log(intst) + log(area) + log(land) + rooms + baths + age + log(intst*intst), data = data1)
summary(model3)
##
## Call:
## glm(formula = log(data1$price) ~ log(data1$dist) + log(intst) +
## log(area) + log(land) + rooms + baths + age + log(intst *
## intst), data = data1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.74072 -0.10669 0.00932 0.11817 0.61387
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.592332 0.641711 11.831 < 2e-16 ***
## log(data1$dist) 0.055389 0.057621 0.961 0.338153
## log(intst) -0.039032 0.051662 -0.756 0.451261
## log(area) 0.319294 0.076418 4.178 5.27e-05 ***
## log(land) 0.076824 0.039505 1.945 0.053908 .
## rooms 0.042528 0.028251 1.505 0.134588
## baths 0.166923 0.041944 3.980 0.000113 ***
## age -0.003567 0.001059 -3.369 0.000985 ***
## log(intst * intst) NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.04038828)
##
## Null deviance: 21.437 on 141 degrees of freedom
## Residual deviance: 5.412 on 134 degrees of freedom
## AIC: -42.964
##
## Number of Fisher Scoring iterations: 2
#Interpretation #log(price)=b0+b1log(dist)+log(intst) + log(area) + log(land) + rooms + baths + age + (log(intst))^2 #log(price)=6.3+0.028log(dist)-0.044log(intst)+0.51log(area)+0.07log(land)+0.05rooms+0.107baths-0.0036age #Question iv).
model4 <- glm(log(data1$price) ~ log(data1$dist) + log(intst) + log(area) + log(land) + rooms + baths + age + log(intst)*log(intst) + log(dist*dist), data = data1)
summary(model4)
##
## Call:
## glm(formula = log(data1$price) ~ log(data1$dist) + log(intst) +
## log(area) + log(land) + rooms + baths + age + log(intst) *
## log(intst) + log(dist * dist), data = data1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.74072 -0.10669 0.00932 0.11817 0.61387
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.592332 0.641711 11.831 < 2e-16 ***
## log(data1$dist) 0.055389 0.057621 0.961 0.338153
## log(intst) -0.039032 0.051662 -0.756 0.451261
## log(area) 0.319294 0.076418 4.178 5.27e-05 ***
## log(land) 0.076824 0.039505 1.945 0.053908 .
## rooms 0.042528 0.028251 1.505 0.134588
## baths 0.166923 0.041944 3.980 0.000113 ***
## age -0.003567 0.001059 -3.369 0.000985 ***
## log(dist * dist) NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.04038828)
##
## Null deviance: 21.437 on 141 degrees of freedom
## Residual deviance: 5.412 on 134 degrees of freedom
## AIC: -42.964
##
## Number of Fisher Scoring iterations: 2
#Question C2
data2 <- wooldridge::wage1
model5 <- glm(log(wage) ~ educ + exper + expersq, data = data2)
summary(model5)
##
## Call:
## glm(formula = log(wage) ~ educ + exper + expersq, data = data2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.96387 -0.29375 -0.04009 0.29497 1.30216
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1279975 0.1059323 1.208 0.227
## educ 0.0903658 0.0074680 12.100 < 2e-16 ***
## exper 0.0410089 0.0051965 7.892 1.77e-14 ***
## expersq -0.0007136 0.0001158 -6.164 1.42e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1988321)
##
## Null deviance: 148.33 on 525 degrees of freedom
## Residual deviance: 103.79 on 522 degrees of freedom
## AIC: 649.06
##
## Number of Fisher Scoring iterations: 2
#Interpretation #log(wage)=b0+b1educ+exper+expersq #log(wage)=0.128+0.09educ+0.041exper-0.0007136expersq #When educ increases by one percentage, wage increases by 9.037%.
#Question ii). # The square of exper is not significant. Because t value is negative. #Question iii).
model6 <- glm(log(wage) ~ educ + exper, data = data2)
summary(model6)
##
## Call:
## glm(formula = log(wage) ~ educ + exper, data = data2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.05800 -0.30136 -0.04539 0.30601 1.44425
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.216854 0.108595 1.997 0.0464 *
## educ 0.097936 0.007622 12.848 < 2e-16 ***
## exper 0.010347 0.001555 6.653 7.24e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.2128962)
##
## Null deviance: 148.33 on 525 degrees of freedom
## Residual deviance: 111.34 on 523 degrees of freedom
## AIC: 684.02
##
## Number of Fisher Scoring iterations: 2