library("readstata13", lib.loc="~/R/win-library/3.2")
setwd("C:/Users/marcogeovanni/OneDrive/Documentos/Chapter 2")
mm <- read.dta13("wages1.dta")
attach(mm)
summary(mm)
##      exper             male            school           wage         
##  Min.   : 1.000   Min.   :0.0000   Min.   : 3.00   Min.   : 0.07656  
##  1st Qu.: 7.000   1st Qu.:0.0000   1st Qu.:11.00   1st Qu.: 3.62157  
##  Median : 8.000   Median :1.0000   Median :12.00   Median : 5.20578  
##  Mean   : 8.043   Mean   :0.5237   Mean   :11.63   Mean   : 5.75759  
##  3rd Qu.: 9.000   3rd Qu.:1.0000   3rd Qu.:12.00   3rd Qu.: 7.30451  
##  Max.   :18.000   Max.   :1.0000   Max.   :16.00   Max.   :39.80892
str(mm)
## 'data.frame':    3294 obs. of  4 variables:
##  $ exper : num  9 12 11 9 8 9 8 10 12 7 ...
##  $ male  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ school: num  13 12 11 14 14 14 12 12 10 12 ...
##  $ wage  : num  6.32 5.48 3.64 4.59 2.42 ...
##  - attr(*, "datalabel")= chr ""
##  - attr(*, "time.stamp")= chr "13 Jun 2004 11:56"
##  - attr(*, "formats")= chr  "%9.0g" "%9.0g" "%9.0g" "%9.0g"
##  - attr(*, "types")= int  254 254 254 254
##  - attr(*, "val.labels")= chr  "" "" "" ""
##  - attr(*, "var.labels")= chr  "" "" "" ""
##  - attr(*, "version")= int 110
##  - attr(*, "label.table")= list()
##  - attr(*, "expansion.fields")= list()
##  - attr(*, "byteorder")= int 2
hist(wage, col="blue")

hist(male, col="yellow")

table(male)
## male
##    0    1 
## 1569 1725
plot(male, wage)
cc <-lm(wage~ male) 
summary(cc)
## 
## Call:
## lm(formula = wage ~ male)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.160 -2.102 -0.554  1.487 33.496 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.14692    0.08122   63.37   <2e-16 ***
## male         1.16610    0.11224   10.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.217 on 3292 degrees of freedom
## Multiple R-squared:  0.03175,    Adjusted R-squared:  0.03145 
## F-statistic: 107.9 on 1 and 3292 DF,  p-value: < 2.2e-16
abline(cc, col="red")
confint(cc)
##                 2.5 %   97.5 %
## (Intercept) 4.9876676 5.306180
## male        0.9460258 1.386169
cc2 <-lm(wage~ male-1)
summary(cc2)
## 
## Call:
## lm(formula = wage ~ male - 1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.160 -0.801  2.567  5.074 33.496 
## 
## Coefficients:
##      Estimate Std. Error t value Pr(>|t|)    
## male   6.3130     0.1154   54.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.793 on 3293 degrees of freedom
## Multiple R-squared:  0.4761, Adjusted R-squared:  0.476 
## F-statistic:  2993 on 1 and 3293 DF,  p-value: < 2.2e-16
abline(cc2, col="orange")

cor(mm)
##              exper       male     school       wage
## exper   1.00000000  0.1295449 -0.1935653 0.05138051
## male    0.12954487  1.0000000 -0.1190839 0.17817368
## school -0.19356533 -0.1190839  1.0000000 0.28249249
## wage    0.05138051  0.1781737  0.2824925 1.00000000
pairs(mm, col="red")
cc3 <- lm(wage~male + school + exper)
summary(cc3)
## 
## Call:
## lm(formula = wage ~ male + school + exper)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.654 -1.967 -0.457  1.444 34.194 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.38002    0.46498  -7.269 4.50e-13 ***
## male         1.34437    0.10768  12.485  < 2e-16 ***
## school       0.63880    0.03280  19.478  < 2e-16 ***
## exper        0.12483    0.02376   5.253 1.59e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.046 on 3290 degrees of freedom
## Multiple R-squared:  0.1326, Adjusted R-squared:  0.1318 
## F-statistic: 167.6 on 3 and 3290 DF,  p-value: < 2.2e-16
library("car", lib.loc="~/R/win-library/3.2")
## Warning: package 'car' was built under R version 3.2.3

vif(cc3)
##     male   school    exper 
## 1.026656 1.048719 1.051493
ncvTest(cc3)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 112.5283    Df = 1     p = 2.737333e-26
spreadLevelPlot(cc3)

## 
## Suggested power transformation:  0.3011862
library("MASS", lib.loc="~/R/win-library/3.2")
## Warning: package 'MASS' was built under R version 3.2.3
NNorm <- studres(cc3)
hist(NNorm, freq=FALSE,main="Distribución del error", col="blue")
xa<-seq(min(NNorm),max(NNorm),length=40)
ya<-dnorm(xa)
lines(xa, ya)