require("faraway")
require("ggplot2")

data("uswages")

uswages2 <- uswages
uswages2$race <- factor(uswages2$race)
uswages2$smsa <- factor(uswages2$smsa)
uswages2$ne <- factor(uswages2$ne)
uswages2$mw <- factor(uswages2$mw)
uswages2$so <- factor(uswages2$so)
uswages2$we <- factor(uswages2$we)
uswages2$pt <- factor(uswages2$pt)

summary(uswages2)
##       wage              educ           exper       race     smsa    
##  Min.   :  50.39   Min.   : 0.00   Min.   :-2.00   0:1844   0: 488  
##  1st Qu.: 308.64   1st Qu.:12.00   1st Qu.: 8.00   1: 156   1:1512  
##  Median : 522.32   Median :12.00   Median :15.00                    
##  Mean   : 608.12   Mean   :13.11   Mean   :18.41                    
##  3rd Qu.: 783.48   3rd Qu.:16.00   3rd Qu.:27.00                    
##  Max.   :7716.05   Max.   :18.00   Max.   :59.00                    
##  ne       mw       so       we       pt      
##  0:1542   0:1503   0:1375   0:1580   0:1815  
##  1: 458   1: 497   1: 625   1: 420   1: 185  
##                                              
##                                              
##                                              
## 
# Fit model 1
l_mod <- lm(wage ~ educ+exper, data = uswages2)
summary(l_mod)
## 
## Call:
## lm(formula = wage ~ educ + exper, data = uswages2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1018.2  -237.9   -50.9   149.9  7228.6 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -242.7994    50.6816  -4.791 1.78e-06 ***
## educ          51.1753     3.3419  15.313  < 2e-16 ***
## exper          9.7748     0.7506  13.023  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 427.9 on 1997 degrees of freedom
## Multiple R-squared:  0.1351, Adjusted R-squared:  0.1343 
## F-statistic:   156 on 2 and 1997 DF,  p-value: < 2.2e-16
# Fit model 2 (log of wage)
l_mod_log <- lm(log(wage) ~ educ+exper, data = uswages2)
summary(l_mod_log)
## 
## Call:
## lm(formula = log(wage) ~ educ + exper, data = uswages2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7533 -0.3495  0.1068  0.4381  3.5699 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.650319   0.078354   59.35   <2e-16 ***
## educ        0.090506   0.005167   17.52   <2e-16 ***
## exper       0.018079   0.001160   15.58   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6615 on 1997 degrees of freedom
## Multiple R-squared:  0.1749, Adjusted R-squared:  0.174 
## F-statistic: 211.6 on 2 and 1997 DF,  p-value: < 2.2e-16
head(uswages2$wage)
## [1] 771.60 617.28 957.83 617.28 902.18 299.15
head(log(uswages2$wage))
## [1] 6.648466 6.425323 6.864670 6.425323 6.804814 5.700945