2-variable quantitative data

chapter 8

2-linear regression, R squared, and residuals

##enter the data, 2 variable quatitative data
AgeCar = c(1,1,3,4,4,5,5,6,7,7,8,8,10,10,13)
PriceCar = c(13990,13495,12999,9500,10495,8995,9495,6999,6950,7850,6999,5995,4950,4495,2850)

## make the scatterplot
plot(AgeCar, PriceCar, col = "purple", type ='p', pch = 16)

## calculate the linear regression model
lm.r = lm(PriceCar~AgeCar)

## add the regression line to the scatterplot
abline(lm.r, col = "dark green")

## state the correlation coeficient
cor(AgeCar, PriceCar)
## [1] -0.9717677
## summary provides lots of information
summary(lm.r)
## 
## Call:
## lm(formula = PriceCar ~ AgeCar)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1532.67  -557.15    45.24   331.40  1590.19 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 14285.95     448.67   31.84 1.01e-13 ***
## AgeCar       -959.05      64.58  -14.85 1.56e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 816.2 on 13 degrees of freedom
## Multiple R-squared:  0.9443, Adjusted R-squared:  0.9401 
## F-statistic: 220.5 on 1 and 13 DF,  p-value: 1.562e-09
##look at the residuals
resid(lm.r)
##            1            2            3            4            5 
##   663.097663   168.097663  1590.189482  -949.764608    45.235392 
##            6            7            8            9           10 
##  -495.718698     4.281302 -1532.672788  -622.626878   277.373122 
##           11           12           13           14           15 
##   385.419032  -618.580968   254.510851  -200.489149  1031.648581
plot(PriceCar,resid(lm.r), col = "red", type ='p', pch = 16, main = "Residual Plot")

Problem 49

##enter the data, 3 variable quatitative data
WaistInches = c(32,36,38,33,39,40,41,35,38,38,33,40,36,32,44,33,41,34,34,44)
WeightLbs = c(175,181,200,159,196,192,205,173,187,188,188,240,175,168,246,160,215,159,146,219)
BodyFatPercent = c(6,21,15,6,22,31,32,21,25,30,10,20,22,9,38,10,27,12,10,28)


## make the scatterplot
plot(WeightLbs, BodyFatPercent, col = "purple", type ='p', pch = 16)

## calculate the linear regression model
lm.r = lm(BodyFatPercent~WeightLbs)

## add the regression line to the scatterplot
abline(lm.r, col = "dark green")

## state the correlation coeficient
cor(WeightLbs, BodyFatPercent)
## [1] 0.6966328
## summary provides lots of information
summary(lm.r)
## 
## Call:
## lm(formula = BodyFatPercent ~ WeightLbs)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.5935  -5.7904   0.6536   5.2731  10.4004 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -27.37626   11.54743  -2.371 0.029119 *  
## WeightLbs     0.24987    0.06065   4.120 0.000643 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.049 on 18 degrees of freedom
## Multiple R-squared:  0.4853, Adjusted R-squared:  0.4567 
## F-statistic: 16.97 on 1 and 18 DF,  p-value: 0.0006434
##look at the residuals
resid(lm.r)
##           1           2           3           4           5           6 
## -10.3517117   3.1490434  -7.5985652  -6.3537255   0.4009314  10.4004279 
##           7           8           9          10          11          12 
##   8.1520641   5.1480365   5.6497986  10.3999245  -9.6000755 -12.5935307 
##          13          14          15          16          17          18 
##   5.6482883  -5.6025928   3.9072245  -2.6035997   0.6533228  -0.3537255 
##          19          20 
##   0.8946383   0.6538262
plot(BodyFatPercent,resid(lm.r), col = "red", type ='p', pch = 16, main = "Residual Plot")

## Problem 50 data calc

## make the scatterplot
plot(WaistInches, BodyFatPercent, col = "blue", type ='p', pch = 16)

## calculate the linear regression model
lm.r2 = lm(BodyFatPercent~WaistInches)

## add the regression line to the scatterplot
abline(lm.r2, col = "dark green")

## state the correlation coeficient
cor(WaistInches, BodyFatPercent)
## [1] 0.8868645
## summary provides lots of information
summary(lm.r2)
## 
## Call:
## lm(formula = BodyFatPercent ~ WaistInches)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.1896 -2.6421 -0.7528  3.5019  8.1396 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -62.5573    10.1577  -6.159 8.16e-06 ***
## WaistInches   2.2215     0.2728   8.144 1.90e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.54 on 18 degrees of freedom
## Multiple R-squared:  0.7865, Adjusted R-squared:  0.7747 
## F-statistic: 66.32 on 1 and 18 DF,  p-value: 1.9e-07
##look at the residuals
resid(lm.r2)
##          1          2          3          4          5          6 
## -2.5313233  3.5825961 -6.8604441 -4.7528435 -2.0819643  4.6965156 
##          7          8          9         10         11         12 
##  3.4749955  5.8041163  3.1395559  8.1395559 -0.7528435 -6.3034844 
##         13         14         15         16         17         18 
##  4.5825961  0.4686767  2.8104351 -0.7528435 -1.5250045 -0.9743636 
##         19         20 
## -2.9743636 -7.1895649
plot(BodyFatPercent,resid(lm.r2), col = "Orange", type ='p', pch = 16, main = "Residual Plot")

  1. No, the model isnt appropiate. This is becuase we can see a clear trend in the residual plot in a positive trend, meaning our model underestimates in the beginning and overestimates in the end.

  2. The slope is very small, at 0.24987, meaning Body Fat percent increases very slowly with each extra pound of weight.

  3. Our model wouldn’t make reliable estimates, since another model could predict the results better.

  4. -27.376+0.24987(190)=20.099 % 21-20.099=0.901

Problem 50

Yes, it appears that waist inches more accurate prediction of body fat percentage, since the residuals are completly random with no trend. This indicates our model fits the data well, with a strong postive trend in the plot and line graph.