Ryan Irey

Lab #3

EPSY 8252

myData = data.frame(wage = c(12, 8, 16.26, 13.65, 8.5), educ = c(12, 12, 12, 
    16, 17), sex = c("M", "F", "M", "M", "M"), sexm = c(1, -1, 1, 1, 1), sexf = c(-1, 
    1, -1, -1, -1), status = c("Married", "Married", "Single", "Married", "Single"), 
    age = c(32, 33, 32, 33, 26), sector = c("manuf", "service", "service", "prof", 
        "clerical"))
myData

##    wage educ sex sexm sexf  status age   sector
## 1 12.00   12   M    1   -1 Married  32    manuf
## 2  8.00   12   F   -1    1 Married  33  service
## 3 16.26   12   M    1   -1  Single  32  service
## 4 13.65   16   M    1   -1 Married  33     prof
## 5  8.50   17   M    1   -1  Single  26 clerical

1. Compute the angle between the sexM and sexF vectors

angle1 <- sum(myData$sexm * myData$sexf)/(sqrt(sum(myData$sexm * myData$sexm)) * 
    sqrt(sum(myData$sexf * myData$sexf)))
acos(angle1) * 180/pi

## [1] 180

2. Compute the correlation between the sexM and sexF vectors

cor(myData$sexm, myData$sexf)

## [1] -1

3. Fit the linear model: wage ~ sexM + sexF – 1, and write the equation for the model.

lm1 <- lm(wage ~ sexm + sexf - 1, data = myData)
summary(lm1)

## 
## Call:
## lm(formula = wage ~ sexm + sexf - 1, data = myData)
## 
## Residuals:
##      1      2      3      4      5 
##  3.518 16.482  7.778  5.168  0.018 
## 
## Coefficients: (1 not defined because of singularities)
##      Estimate Std. Error t value Pr(>|t|)
## sexm     8.48       4.31    1.97     0.12
## sexf       NA         NA      NA       NA
## 
## Residual standard error: 9.63 on 4 degrees of freedom
## Multiple R-squared:  0.492,  Adjusted R-squared:  0.365 
## F-statistic: 3.88 on 1 and 4 DF,  p-value: 0.12

model.matrix(lm1)

##   sexm sexf
## 1    1   -1
## 2   -1    1
## 3    1   -1
## 4    1   -1
## 5    1   -1
## attr(,"assign")
## [1] 1 2

4. Explain using vector geometry why NA is produced for one of the coefficients. (Hint: think subspace)

5. Fit the linear model: wage ~ 1 + sexF. Write out the b vector.

lm2 <- lm(wage ~ 1 + sexf, data = myData)
summary(lm2)

## 
## Call:
## lm(formula = wage ~ 1 + sexf, data = myData)
## 
## Residuals:
##         1         2         3         4         5 
## -6.02e-01 -1.11e-16  3.66e+00  1.05e+00 -4.10e+00 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    10.30       1.82    5.67    0.011 *
## sexf           -2.30       1.82   -1.27    0.295  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.25 on 3 degrees of freedom
## Multiple R-squared:  0.349,  Adjusted R-squared:  0.131 
## F-statistic: 1.61 on 1 and 3 DF,  p-value: 0.295


b.vec1 <- c(10.3, -2.3)
b.vec1

## [1] 10.3 -2.3

6. Interpret the intercept coefficient.

7. Interpret the slope coefficient.

8. Compute the fitted values for male and female.

preds1 <- predict(lm2, newdata = myData)
preds1

##    1    2    3    4    5 
## 12.6  8.0 12.6 12.6 12.6

9. Fit the linear model: wage ~ 1 + sexM. Write out the b vector.

lm3 <- lm(wage ~ 1 + sexm, data = myData)
summary(lm3)

## 
## Call:
## lm(formula = wage ~ 1 + sexm, data = myData)
## 
## Residuals:
##         1         2         3         4         5 
## -6.02e-01 -1.11e-16  3.66e+00  1.05e+00 -4.10e+00 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    10.30       1.82    5.67    0.011 *
## sexm            2.30       1.82    1.27    0.295  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.25 on 3 degrees of freedom
## Multiple R-squared:  0.349,  Adjusted R-squared:  0.131 
## F-statistic: 1.61 on 1 and 3 DF,  p-value: 0.295


b.vec2 <- c(10.3, 2.3)
b.vec2

## [1] 10.3  2.3

10. Interpret the intercept coefficient.

11. Interpret the slope coefficient.

12. Compute the fitted values for male and female.

preds2 <- predict(lm3, newdata = myData)
preds2

##    1    2    3    4    5 
## 12.6  8.0 12.6 12.6 12.6

13. Compute the marginal (grand) mean for wages.

wage.gm <- mean(myData$wage)
wage.gm

## [1] 11.68

14. Compute the mean wage conditioned on sex.

male <- subset(myData, sex == "M")
female <- subset(myData, sex == "F")

mean.male <- mean(male$wage)
mean.female <- mean(female$wage)

wage.sex <- (mean.male + mean.female)/2
wage.sex

## [1] 10.3

15. How do the marginal and conditional means relate to the linear models fitted thus far? Explain.

Create another effects-coded vector for the sex variable (sexF2) by coding weighting the sexF variable by the inverse of the conditional sample sizes, such that female is coded 1 and male is coded -¼.

16. Fit the linear model: wage ~ 1 + sexM. Write out the b vector.

myData$sexf2 <- c(-0.25, 1, -0.25, -0.25, -0.2)
myData

##    wage educ sex sexm sexf  status age   sector sexf2
## 1 12.00   12   M    1   -1 Married  32    manuf -0.25
## 2  8.00   12   F   -1    1 Married  33  service  1.00
## 3 16.26   12   M    1   -1  Single  32  service -0.25
## 4 13.65   16   M    1   -1 Married  33     prof -0.25
## 5  8.50   17   M    1   -1  Single  26 clerical -0.20


lm4 <- lm(wage ~ 1 + sexf2, data = myData)
summary(lm4)

## 
## Call:
## lm(formula = wage ~ 1 + sexf2, data = myData)
## 
## Residuals:
##      1      2      3      4      5 
## -0.691  0.160  3.569  0.959 -3.997 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    11.72       1.42    8.27   0.0037 **
## sexf2          -3.88       2.86   -1.36   0.2681   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.17 on 3 degrees of freedom
## Multiple R-squared:  0.38,   Adjusted R-squared:  0.173 
## F-statistic: 1.84 on 1 and 3 DF,  p-value: 0.268


b.vec3 <- c(11.75, -3.88)
b.vec3

## [1] 11.75 -3.88

17. Interpret the intercept coefficient.

18. Interpret the slope coefficient.

19. Compute the fitted values for male and female.

preds3 <- predict(lm4, newdata = myData)
preds3

##     1     2     3     4     5 
## 12.69  7.84 12.69 12.69 12.50

20. How do the marginal and conditional means relate to this linear model? Explain.

Consider a regression through the origin carried out on four observations (Xi, Yi), where i=1, …, 4.

21. Write out the X and Y matrices.

myData2 = data.frame(x = c(-5, 0, 3, 5), y = c(0, 7, 14, 21))
myData2

##    x  y
## 1 -5  0
## 2  0  7
## 3  3 14
## 4  5 21

22. Write out the b vector

lm.5 <- lm(y ~ x, data = myData2)
summary(lm.5)

## 
## Call:
## lm(formula = y ~ x, data = myData2)
## 
## Residuals:
##     1     2     3     4 
##  1.20 -1.97 -1.08  1.85 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    8.974      1.136    7.90    0.016 *
## x              2.035      0.296    6.88    0.020 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.23 on 2 degrees of freedom
## Multiple R-squared:  0.959,  Adjusted R-squared:  0.939 
## F-statistic: 47.3 on 1 and 2 DF,  p-value: 0.0205


b.vec4 <- c(8.974, 2.035)
b.vec4

## [1] 8.974 2.035

23. Express the elements from b as linear combinations of X and Y.

Y = myData2$y
X = model.matrix(lm.5)

h = X %*% solve(t(X) %*% X) %*% (t(X))
h

##          1      2       3       4
## 1  0.83260 0.3260 0.02203 -0.1806
## 2  0.32599 0.2599 0.22026  0.1938
## 3  0.02203 0.2203 0.33921  0.4185
## 4 -0.18062 0.1938 0.41850  0.5683


lincomb <- h %*% Y
lincomb

##     [,1]
## 1 -1.203
## 2  8.974
## 3 15.079
## 4 19.150

24. Write the expectation vector for ε.

i <- diag(4)

e.vector <- (i - h) %*% Y
e.vector

##     [,1]
## 1  1.203
## 2 -1.974
## 3 -1.079
## 4  1.850

25. Obtain an expression for the variance–covariance matrix of the fitted values, ! , (where i=1,…, n) in terms of the hat matrix H.

anova(lm.5)

## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq F value Pr(>F)  
## x          1  235.1     235    47.4   0.02 *
## Residuals  2    9.9       5                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

mse <- 4.965

# expression
vcv <- 4.965 * (i - h)
vcv

##         1       2       3       4
## 1  0.8311 -1.6185 -0.1094  0.8968
## 2 -1.6185  3.6745 -1.0936 -0.9624
## 3 -0.1094 -1.0936  3.2808 -2.0779
## 4  0.8968 -0.9624 -2.0779  2.1435