Straight Line Regression Based on Weighted Least Squares

CHAPTER 4 OUTPUT

This shows the outputs from Chapter 4 using R. The textbook is “A Modern Approach to Regression with R” by Simon J. Sheather (2008). The R code provided with the book has been updated.

cleaningwtd <- read_excel("MARData.xlsx", sheet = "cleaningwtd")

attach(cleaningwtd)
head(cleaningwtd, 10)

## # A tibble: 10 x 4
##     Case Crews Rooms StdDev
##    <dbl> <dbl> <dbl>  <dbl>
##  1     1    16    51  12.0 
##  2     2    10    37   7.93
##  3     3    12    37   7.29
##  4     4    16    46  12.0 
##  5     5    16    45  12.0 
##  6     6     4    11   4.97
##  7     7     2     6   3   
##  8     8     4    19   4.97
##  9     9     6    29   4.69
## 10    10     2    14   3

#Regression output on page 117
wm1 <- lm(Rooms~Crews,weights  =  1/StdDev^2)
summary(wm1)

## 
## Call:
## lm(formula = Rooms ~ Crews, weights = 1/StdDev^2)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.43184 -0.82013  0.03909  0.69029  2.01030 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.8095     1.1158   0.725    0.471    
## Crews         3.8255     0.1788  21.400   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9648 on 51 degrees of freedom
## Multiple R-squared:  0.8998, Adjusted R-squared:  0.8978 
## F-statistic:   458 on 1 and 51 DF,  p-value: < 2.2e-16

predict(wm1,newdata = data.frame(Crews = c(4,16)),interval = "prediction",level  =  0.95)

## Warning in predict.lm(wm1, newdata = data.frame(Crews = c(4, 16)), interval = "prediction", : Assuming constant prediction variance even though model fit is weighted

##        fit      lwr      upr
## 1 16.11133 13.71210 18.51056
## 2 62.01687 57.38601 66.64773

#Regression output on page 120
ynew <- Rooms/StdDev
x1new <- 1/StdDev
x2new <- Crews/StdDev
wm1check <- lm(ynew~x1new + x2new - 1)
summary(wm1check)

## 
## Call:
## lm(formula = ynew ~ x1new + x2new - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.43184 -0.82013  0.03909  0.69029  2.01030 
## 
## Coefficients:
##       Estimate Std. Error t value Pr(>|t|)    
## x1new   0.8095     1.1158   0.725    0.471    
## x2new   3.8255     0.1788  21.400   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9648 on 51 degrees of freedom
## Multiple R-squared:  0.9617, Adjusted R-squared:  0.9602 
## F-statistic: 639.6 on 2 and 51 DF,  p-value: < 2.2e-16

predict(wm1check,newdata  =  data.frame(x1new  =  c(1/4.966555,1/12.000463),x2new  =  c(4/4.966555,16/12.000463)),interval = "prediction",level  =  0.95)

##        fit      lwr      upr
## 1 3.243965 1.286166 5.201763
## 2 5.167873 3.199481 7.136265

detach(cleaningwtd)

#################EXERCISES

#Ex 4.2.3

Houston <- read_excel("MARData.xlsx", sheet  =  "HoustonRealEstate")
attach(Houston)

head(Houston, 10)

## # A tibble: 10 x 4
##       Yi    ni   x1i   x2i
##    <dbl> <dbl> <dbl> <dbl>
##  1 169.      7 0.857 0    
##  2  56.8     6 0.167 0.667
##  3  25.5     6 0     1    
##  4  90.7     5 0.8   0.2  
##  5  92.6     8 0.5   0    
##  6  87.8     9 0.667 0    
##  7  70.8     5 0     0.2  
##  8  56.2     8 0     0.25 
##  9 152.      5 0     0    
## 10  67.5     7 0.571 0.143

#Figure 4.1 on page 123
m1 <- lm(Yi~x1i+x2i,weights  =  ni)
leverage1 <- hatvalues(m1)
StanRes1 <- rstandard(m1)
absrtsr1 <- sqrt(abs(StanRes1))
residual1 <- m1$residuals
par(mfrow  =  c(2,3))
plot(x1i,Yi)
plot(x2i,Yi)
plot(x1i,x2i)
plot(m1$fitted.values,StanRes1, ylab = "Standardized Residuals",xlab = "Fitted Values")
abline(h  =  2,lty  =  2)
abline(h = -2,lty  =  2)
plot(m1$fitted.values,absrtsr1,ylab = "Square Root(|Standardized Residuals|)",xlab = "Fitted Values")
abline(lsfit(m1$fitted.values,absrtsr1),lty  =  2,col  =  2)

detach(Houston)

Straight Line Regression Based on Weighted Least Squares

A Student

2022-03-30

CHAPTER 4 OUTPUT