From Lesson 3 - Multiple Regression

WAGES <- readXL("WAGES.xls")
WAGES
str(WAGES)
'data.frame':   1289 obs. of  7 variables:
 $ obs      : num  1 2 3 4 5 6 7 8 9 10 ...
 $ wage     : num  11.6 5 12 7 21.1 ...
 $ female   : num  1 0 0 0 1 1 1 1 0 1 ...
 $ nonwhite : num  0 0 0 1 1 0 0 1 0 0 ...
 $ union    : num  0 0 0 1 0 0 0 0 0 0 ...
 $ education: num  12 9 16 14 16 12 12 12 18 18 ...
 $ exper    : num  20 9 15 38 19 4 14 32 7 5 ...
summary(WAGES)
      obs            wage           female          nonwhite          union         education         exper      
 Min.   :   1   Min.   : 0.84   Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   : 0.00   Min.   : 0.00  
 1st Qu.: 323   1st Qu.: 6.92   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:12.00   1st Qu.: 9.00  
 Median : 645   Median :10.08   Median :0.0000   Median :0.0000   Median :0.000   Median :12.00   Median :18.00  
 Mean   : 645   Mean   :12.37   Mean   :0.4973   Mean   :0.1528   Mean   :0.159   Mean   :13.15   Mean   :18.79  
 3rd Qu.: 967   3rd Qu.:15.63   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:16.00   3rd Qu.:27.00  
 Max.   :1289   Max.   :64.08   Max.   :1.0000   Max.   :1.0000   Max.   :1.000   Max.   :20.00   Max.   :56.00  
cor(WAGES)
                   obs        wage       female    nonwhite        union    education        exper
obs        1.000000000 -0.05843877  0.006296486 -0.02566251 -0.002907478  0.003806248 -0.006655294
wage      -0.058438769  1.00000000 -0.223301829 -0.12783381  0.102246655  0.456517979  0.173173303
female     0.006296486 -0.22330183  1.000000000  0.04327185 -0.088856935 -0.031439159 -0.022656813
nonwhite  -0.025662507 -0.12783381  0.043271852  1.00000000  0.080587911 -0.087061729 -0.039129103
union     -0.002907478  0.10224666 -0.088856935  0.08058791  1.000000000  0.003966952  0.154319024
education  0.003806248  0.45651798 -0.031439159 -0.08706173  0.003966952  1.000000000 -0.180103012
exper     -0.006655294  0.17317330 -0.022656813 -0.03912910  0.154319024 -0.180103012  1.000000000
WAGES.lm <- lm(wage~.,data=WAGES)
summary(WAGES.lm)

Call:
lm(formula = wage ~ ., data = WAGES)

Residuals:
    Min      1Q  Median      3Q     Max 
-20.622  -3.668  -1.001   2.609  50.493 

Coefficients:
              Estimate Std. Error t value      Pr(>|t|)    
(Intercept) -6.3673562  1.0617226  -5.997 0.00000000261 ***
obs         -0.0012550  0.0004863  -2.581       0.00997 ** 
female      -3.0679942  0.3638243  -8.433       < 2e-16 ***
nonwhite    -1.5994861  0.5082405  -3.147       0.00169 ** 
union        1.0973070  0.5049656   2.173       0.02996 *  
education    1.3703624  0.0657593  20.839       < 2e-16 ***
exper        0.1663016  0.0160127  10.386       < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.494 on 1282 degrees of freedom
Multiple R-squared:  0.3268,    Adjusted R-squared:  0.3237 
F-statistic: 103.7 on 6 and 1282 DF,  p-value: < 2.2e-16
WAGES.lm2 <- lm(wage~.-obs,data=WAGES)
summary(WAGES.lm2)

Call:
lm(formula = wage ~ . - obs, data = WAGES)

Residuals:
    Min      1Q  Median      3Q     Max 
-20.781  -3.760  -1.044   2.418  50.414 

Coefficients:
            Estimate Std. Error t value         Pr(>|t|)    
(Intercept) -7.18334    1.01579  -7.072 0.00000000000251 ***
female      -3.07488    0.36462  -8.433          < 2e-16 ***
nonwhite    -1.56531    0.50919  -3.074          0.00216 ** 
union        1.09598    0.50608   2.166          0.03052 *  
education    1.37030    0.06590  20.792          < 2e-16 ***
exper        0.16661    0.01605  10.382          < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.508 on 1283 degrees of freedom
Multiple R-squared:  0.3233,    Adjusted R-squared:  0.3207 
F-statistic: 122.6 on 5 and 1283 DF,  p-value: < 2.2e-16

\[ wage = -7.18 -3.07female - 1.57nonwhite + 1.10union + 1.37education + 0.17exper + \epsilon\ \]

library(carData)
Prestige
PRESTIGE <- Prestige
PRESTIGE.lm <- lm(income~.,data=PRESTIGE)
summary(PRESTIGE.lm)

Call:
lm(formula = income ~ ., data = PRESTIGE)

Residuals:
    Min      1Q  Median      3Q     Max 
-7752.4  -954.6  -331.2   742.6 14301.3 

Coefficients:
              Estimate Std. Error t value    Pr(>|t|)    
(Intercept)    7.32053 3037.27048   0.002     0.99808    
education    131.18372  288.74961   0.454     0.65068    
women        -53.23480    9.83107  -5.415 0.000000496 ***
prestige     139.20912   36.40239   3.824     0.00024 ***
census         0.04209    0.23568   0.179     0.85865    
typeprof     509.15150 1798.87914   0.283     0.77779    
typewc       347.99010 1173.89384   0.296     0.76757    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2633 on 91 degrees of freedom
  (4 observations deleted due to missingness)
Multiple R-squared:  0.6363,    Adjusted R-squared:  0.6123 
F-statistic: 26.54 on 6 and 91 DF,  p-value: < 2.2e-16

\[ income = +7.32 + 131.18education - 53.23women + 139.21prestige + 0.04census + 509.15typeprof + 347.99typewc \]

From Lesson 4 - Regression Diagnostics

source("scripts/r4abep-01.R")
library(RcmdrMisc)
options(scipen = 10)
library(esquisse)
WOMEN <- women
WOMEN
str(WOMEN)
'data.frame':   15 obs. of  2 variables:
 $ height: num  58 59 60 61 62 63 64 65 66 67 ...
 $ weight: num  115 117 120 123 126 129 132 135 139 142 ...
WOMEN.lm <- lm(height~weight,data=WOMEN)
summary(WOMEN.lm)

Call:
lm(formula = height ~ weight, data = WOMEN)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.83233 -0.26249  0.08314  0.34353  0.49790 

Coefficients:
             Estimate Std. Error t value           Pr(>|t|)    
(Intercept) 25.723456   1.043746   24.64 0.0000000000026848 ***
weight       0.287249   0.007588   37.85 0.0000000000000109 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.44 on 13 degrees of freedom
Multiple R-squared:  0.991, Adjusted R-squared:  0.9903 
F-statistic:  1433 on 1 and 13 DF,  p-value: 0.00000000000001091
r4abep.plotlm(WOMEN.lm)

WOMEN.lm2 <- lm(height~weight+I(weight^2),data=WOMEN)
summary(WOMEN.lm2)

Call:
lm(formula = height ~ weight + I(weight^2), data = WOMEN)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.105338 -0.035764 -0.004898  0.049430  0.141593 

Coefficients:
                Estimate   Std. Error t value          Pr(>|t|)    
(Intercept) -11.74693860   1.71998084   -6.83 0.000018241147800 ***
weight        0.83434066   0.02502062   33.35 0.000000000000336 ***
I(weight^2)  -0.00197330   0.00009014  -21.89 0.000000000048424 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.07158 on 12 degrees of freedom
Multiple R-squared:  0.9998,    Adjusted R-squared:  0.9997 
F-statistic: 2.732e+04 on 2 and 12 DF,  p-value: < 2.2e-16
r4abep.plotlm(WOMEN.lm2)

WAGES01 <- readXL("WAGES01.xls")
WAGES01
str(WAGES01)
'data.frame':   1289 obs. of  8 variables:
 $ obs      : num  1 2 3 4 5 6 7 8 9 10 ...
 $ wage     : num  11.6 5 12 7 21.1 ...
 $ female   : num  1 0 0 0 1 1 1 1 0 1 ...
 $ nonwhite : num  0 0 0 1 1 0 0 1 0 0 ...
 $ union    : num  0 0 0 1 0 0 0 0 0 0 ...
 $ education: num  12 9 16 14 16 12 12 12 18 18 ...
 $ exper    : num  20 9 15 38 19 4 14 32 7 5 ...
 $ age      : num  38 24 37 58 41 22 32 50 31 29 ...
WAGES01.lm <- lm(wage~.,data=WAGES01)
summary(WAGES01.lm)

Call:
lm(formula = wage ~ ., data = WAGES01)

Residuals:
    Min      1Q  Median      3Q     Max 
-20.622  -3.668  -1.001   2.609  50.493 

Coefficients: (1 not defined because of singularities)
              Estimate Std. Error t value      Pr(>|t|)    
(Intercept) -6.3673562  1.0617226  -5.997 0.00000000261 ***
obs         -0.0012550  0.0004863  -2.581       0.00997 ** 
female      -3.0679942  0.3638243  -8.433       < 2e-16 ***
nonwhite    -1.5994861  0.5082405  -3.147       0.00169 ** 
union        1.0973070  0.5049656   2.173       0.02996 *  
education    1.3703624  0.0657593  20.839       < 2e-16 ***
exper        0.1663016  0.0160127  10.386       < 2e-16 ***
age                 NA         NA      NA            NA    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.494 on 1282 degrees of freedom
Multiple R-squared:  0.3268,    Adjusted R-squared:  0.3237 
F-statistic: 103.7 on 6 and 1282 DF,  p-value: < 2.2e-16
r4abep.cor(WAGES01)

r4abep.corgram(WAGES01)

WAGES01.lm2 <- lm(wage~.-exper,data=WAGES01)
summary(WAGES01.lm2)

Call:
lm(formula = wage ~ . - exper, data = WAGES01)

Residuals:
    Min      1Q  Median      3Q     Max 
-20.622  -3.668  -1.001   2.609  50.493 

Coefficients:
              Estimate Std. Error t value       Pr(>|t|)    
(Intercept) -7.3651657  1.1068167  -6.654 0.000000000042 ***
obs         -0.0012550  0.0004863  -2.581        0.00997 ** 
female      -3.0679942  0.3638243  -8.433        < 2e-16 ***
nonwhite    -1.5994861  0.5082405  -3.147        0.00169 ** 
union        1.0973070  0.5049656   2.173        0.02996 *  
education    1.2040609  0.0646781  18.616        < 2e-16 ***
age          0.1663016  0.0160127  10.386        < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.494 on 1282 degrees of freedom
Multiple R-squared:  0.3268,    Adjusted R-squared:  0.3237 
F-statistic: 103.7 on 6 and 1282 DF,  p-value: < 2.2e-16
WWORK <- readXL("WWORK.xls")
WWORK