library(wooldridge)
## Warning: package 'wooldridge' was built under R version 4.2.3
data <- wooldridge::kielmc
data1 <- subset(data, year==1981)
View(data1)

#Question C1 #Question i).

model1 <- glm(log(data1$price) ~ log(data1$dist), data = data1)
summary(model1)
## 
## Call:
## glm(formula = log(data1$price) ~ log(data1$dist), data = data1)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.87318  -0.22657  -0.01985   0.25687   0.95045  
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      8.04716    0.64624  12.452  < 2e-16 ***
## log(data1$dist)  0.36488    0.06576   5.548 1.39e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.125522)
## 
##     Null deviance: 21.437  on 141  degrees of freedom
## Residual deviance: 17.573  on 140  degrees of freedom
## AIC: 112.28
## 
## Number of Fisher Scoring iterations: 2

#Interpretation #log(price)=b0+b1log(dist)+u #log(price)=8.045+0.365log(dist)+u #When log(dist) increases by one percentage, log(price) increases by 36.51%.

#Question ii).

model2 <- glm(log(data1$price) ~ log(data1$dist) + log(intst) + log(area) + log(land) + rooms + baths + age, data = data1)
summary(model2)
## 
## Call:
## glm(formula = log(data1$price) ~ log(data1$dist) + log(intst) + 
##     log(area) + log(land) + rooms + baths + age, data = data1)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.74072  -0.10669   0.00932   0.11817   0.61387  
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      7.592332   0.641711  11.831  < 2e-16 ***
## log(data1$dist)  0.055389   0.057621   0.961 0.338153    
## log(intst)      -0.039032   0.051662  -0.756 0.451261    
## log(area)        0.319294   0.076418   4.178 5.27e-05 ***
## log(land)        0.076824   0.039505   1.945 0.053908 .  
## rooms            0.042528   0.028251   1.505 0.134588    
## baths            0.166923   0.041944   3.980 0.000113 ***
## age             -0.003567   0.001059  -3.369 0.000985 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.04038828)
## 
##     Null deviance: 21.437  on 141  degrees of freedom
## Residual deviance:  5.412  on 134  degrees of freedom
## AIC: -42.964
## 
## Number of Fisher Scoring iterations: 2

#Interpretation #log(price)=b0+b1log(dist)+log(intst) + log(area) + log(land) + rooms + baths + age #log(price)=7.592+0.055log(dist)-0.039log(intst)+0.32log(area)+0.077log(land)+0.043rooms+0.167baths-0.0036age #Now, there are 7 independent variables in the equation. The area (square footage of the house), the number of bathrooms, the age of a house are significant to calculate the price of a house. When the number of bathrooms in a house increases by one percentage , the price of house increases by 16.69 percentages. Also, when the age of a house increases by one percentage, the price of a house reduces by 0.36 percentage. #Question iii).

model3 <- glm(log(data1$price) ~ log(data1$dist) + log(intst) + log(area) + log(land) + rooms + baths + age + log(intst*intst), data = data1)
summary(model3)
## 
## Call:
## glm(formula = log(data1$price) ~ log(data1$dist) + log(intst) + 
##     log(area) + log(land) + rooms + baths + age + log(intst * 
##     intst), data = data1)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.74072  -0.10669   0.00932   0.11817   0.61387  
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         7.592332   0.641711  11.831  < 2e-16 ***
## log(data1$dist)     0.055389   0.057621   0.961 0.338153    
## log(intst)         -0.039032   0.051662  -0.756 0.451261    
## log(area)           0.319294   0.076418   4.178 5.27e-05 ***
## log(land)           0.076824   0.039505   1.945 0.053908 .  
## rooms               0.042528   0.028251   1.505 0.134588    
## baths               0.166923   0.041944   3.980 0.000113 ***
## age                -0.003567   0.001059  -3.369 0.000985 ***
## log(intst * intst)        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.04038828)
## 
##     Null deviance: 21.437  on 141  degrees of freedom
## Residual deviance:  5.412  on 134  degrees of freedom
## AIC: -42.964
## 
## Number of Fisher Scoring iterations: 2

#Interpretation #log(price)=b0+b1log(dist)+log(intst) + log(area) + log(land) + rooms + baths + age + (log(intst))^2 #log(price)=6.3+0.028log(dist)-0.044log(intst)+0.51log(area)+0.07log(land)+0.05rooms+0.107baths-0.0036age #Question iv).

model4 <- glm(log(data1$price) ~ log(data1$dist) + log(intst) + log(area) + log(land) + rooms + baths + age + log(intst)*log(intst) + log(dist*dist), data = data1)
summary(model4)
## 
## Call:
## glm(formula = log(data1$price) ~ log(data1$dist) + log(intst) + 
##     log(area) + log(land) + rooms + baths + age + log(intst) * 
##     log(intst) + log(dist * dist), data = data1)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.74072  -0.10669   0.00932   0.11817   0.61387  
## 
## Coefficients: (1 not defined because of singularities)
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       7.592332   0.641711  11.831  < 2e-16 ***
## log(data1$dist)   0.055389   0.057621   0.961 0.338153    
## log(intst)       -0.039032   0.051662  -0.756 0.451261    
## log(area)         0.319294   0.076418   4.178 5.27e-05 ***
## log(land)         0.076824   0.039505   1.945 0.053908 .  
## rooms             0.042528   0.028251   1.505 0.134588    
## baths             0.166923   0.041944   3.980 0.000113 ***
## age              -0.003567   0.001059  -3.369 0.000985 ***
## log(dist * dist)        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.04038828)
## 
##     Null deviance: 21.437  on 141  degrees of freedom
## Residual deviance:  5.412  on 134  degrees of freedom
## AIC: -42.964
## 
## Number of Fisher Scoring iterations: 2

#Question C2

data2 <- wooldridge::wage1
model5 <- glm(log(wage) ~ educ + exper + expersq, data = data2)
summary(model5)
## 
## Call:
## glm(formula = log(wage) ~ educ + exper + expersq, data = data2)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.96387  -0.29375  -0.04009   0.29497   1.30216  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1279975  0.1059323   1.208    0.227    
## educ         0.0903658  0.0074680  12.100  < 2e-16 ***
## exper        0.0410089  0.0051965   7.892 1.77e-14 ***
## expersq     -0.0007136  0.0001158  -6.164 1.42e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1988321)
## 
##     Null deviance: 148.33  on 525  degrees of freedom
## Residual deviance: 103.79  on 522  degrees of freedom
## AIC: 649.06
## 
## Number of Fisher Scoring iterations: 2

#Interpretation #log(wage)=b0+b1educ+exper+expersq #log(wage)=0.128+0.09educ+0.041exper-0.0007136expersq #When educ increases by one percentage, wage increases by 9.037%.

#Question ii). # The square of exper is not significant. Because t value is negative. #Question iii).

model6 <- glm(log(wage) ~ educ + exper, data = data2)
summary(model6)
## 
## Call:
## glm(formula = log(wage) ~ educ + exper, data = data2)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.05800  -0.30136  -0.04539   0.30601   1.44425  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.216854   0.108595   1.997   0.0464 *  
## educ        0.097936   0.007622  12.848  < 2e-16 ***
## exper       0.010347   0.001555   6.653 7.24e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.2128962)
## 
##     Null deviance: 148.33  on 525  degrees of freedom
## Residual deviance: 111.34  on 523  degrees of freedom
## AIC: 684.02
## 
## Number of Fisher Scoring iterations: 2