library("readstata13", lib.loc="~/R/win-library/3.2")
setwd("C:/Users/marcogeovanni/OneDrive/Documentos/Chapter 2")
mm <- read.dta13("wages1.dta")
attach(mm)
summary(mm)
## exper male school wage
## Min. : 1.000 Min. :0.0000 Min. : 3.00 Min. : 0.07656
## 1st Qu.: 7.000 1st Qu.:0.0000 1st Qu.:11.00 1st Qu.: 3.62157
## Median : 8.000 Median :1.0000 Median :12.00 Median : 5.20578
## Mean : 8.043 Mean :0.5237 Mean :11.63 Mean : 5.75759
## 3rd Qu.: 9.000 3rd Qu.:1.0000 3rd Qu.:12.00 3rd Qu.: 7.30451
## Max. :18.000 Max. :1.0000 Max. :16.00 Max. :39.80892
str(mm)
## 'data.frame': 3294 obs. of 4 variables:
## $ exper : num 9 12 11 9 8 9 8 10 12 7 ...
## $ male : num 0 0 0 0 0 0 0 0 0 0 ...
## $ school: num 13 12 11 14 14 14 12 12 10 12 ...
## $ wage : num 6.32 5.48 3.64 4.59 2.42 ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr "13 Jun 2004 11:56"
## - attr(*, "formats")= chr "%9.0g" "%9.0g" "%9.0g" "%9.0g"
## - attr(*, "types")= int 254 254 254 254
## - attr(*, "val.labels")= chr "" "" "" ""
## - attr(*, "var.labels")= chr "" "" "" ""
## - attr(*, "version")= int 110
## - attr(*, "label.table")= list()
## - attr(*, "expansion.fields")= list()
## - attr(*, "byteorder")= int 2
hist(wage, col="blue")

hist(male, col="yellow")

table(male)
## male
## 0 1
## 1569 1725
plot(male, wage)
cc <-lm(wage~ male)
summary(cc)
##
## Call:
## lm(formula = wage ~ male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.160 -2.102 -0.554 1.487 33.496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.14692 0.08122 63.37 <2e-16 ***
## male 1.16610 0.11224 10.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.217 on 3292 degrees of freedom
## Multiple R-squared: 0.03175, Adjusted R-squared: 0.03145
## F-statistic: 107.9 on 1 and 3292 DF, p-value: < 2.2e-16
abline(cc, col="red")
confint(cc)
## 2.5 % 97.5 %
## (Intercept) 4.9876676 5.306180
## male 0.9460258 1.386169
cc2 <-lm(wage~ male-1)
summary(cc2)
##
## Call:
## lm(formula = wage ~ male - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.160 -0.801 2.567 5.074 33.496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## male 6.3130 0.1154 54.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.793 on 3293 degrees of freedom
## Multiple R-squared: 0.4761, Adjusted R-squared: 0.476
## F-statistic: 2993 on 1 and 3293 DF, p-value: < 2.2e-16
abline(cc2, col="orange")

cor(mm)
## exper male school wage
## exper 1.00000000 0.1295449 -0.1935653 0.05138051
## male 0.12954487 1.0000000 -0.1190839 0.17817368
## school -0.19356533 -0.1190839 1.0000000 0.28249249
## wage 0.05138051 0.1781737 0.2824925 1.00000000
pairs(mm, col="red")
cc3 <- lm(wage~male + school + exper)
summary(cc3)
##
## Call:
## lm(formula = wage ~ male + school + exper)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.654 -1.967 -0.457 1.444 34.194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.38002 0.46498 -7.269 4.50e-13 ***
## male 1.34437 0.10768 12.485 < 2e-16 ***
## school 0.63880 0.03280 19.478 < 2e-16 ***
## exper 0.12483 0.02376 5.253 1.59e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 3290 degrees of freedom
## Multiple R-squared: 0.1326, Adjusted R-squared: 0.1318
## F-statistic: 167.6 on 3 and 3290 DF, p-value: < 2.2e-16
library("car", lib.loc="~/R/win-library/3.2")
## Warning: package 'car' was built under R version 3.2.3

vif(cc3)
## male school exper
## 1.026656 1.048719 1.051493
ncvTest(cc3)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 112.5283 Df = 1 p = 2.737333e-26
spreadLevelPlot(cc3)

##
## Suggested power transformation: 0.3011862
library("MASS", lib.loc="~/R/win-library/3.2")
## Warning: package 'MASS' was built under R version 3.2.3
NNorm <- studres(cc3)
hist(NNorm, freq=FALSE,main="Distribución del error", col="blue")
xa<-seq(min(NNorm),max(NNorm),length=40)
ya<-dnorm(xa)
lines(xa, ya)
