# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 241 Module 4 Day 7   ~
# ~ Simple Linear Regression ~
# ~      How is it fit?      ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Body Fat Data:
# Percentage of body fat, age, weight, height, and ten body circumference 
# measurements (e.g., abdomen, neck, ankle, etc.) are recorded for 252 men. 
# Body fat is estimated through an underwater weighing technique. 

# There are numerous variables in the lead dataset but those of interest for 
# the current exercise include:
# 1.  brobf  : % Body Fat
# 2.  abdmn  : Abdomen Circumference (cm)
# 3.  neck   : Neck Circumference (cm)
# 4.  ankle  : Ankel Circumference (cm)
# 5.  height : Height (in)

## Download and load the data file = bpdata
download.file("http://www.duke.edu/~sgrambow/crp241data/bodyfat.RData",
              destfile = "bodyfat.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("bodyfat.RData")
# How is the simple linear regression model really fit?
fit <- lm(brobf ~ abdmn,data=bodyfat)
summary(fit)
## 
## Call:
## lm(formula = brobf ~ abdmn, data = bodyfat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.6257  -3.4672   0.0111   3.1415  11.9754 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -35.19661    2.46229  -14.29   <2e-16 ***
## abdmn         0.58489    0.02643   22.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.514 on 250 degrees of freedom
## Multiple R-squared:  0.6621, Adjusted R-squared:  0.6608 
## F-statistic: 489.9 on 1 and 250 DF,  p-value: < 2.2e-16
# - Create scatter plot of X vs. Y
plot(x=bodyfat$abdmn,y=bodyfat$brobf,cex=1.5,pch=19,col='blue')
# - Add the fitted regression line (black line)
abline(fit,lwd=2)
# - Add lines to represent residuals for two data points 
#   (red lines and red stars)
beta0 <- fit$coefficients[1]
beta1 <- fit$coefficients[2]
# highlight point (104.5, 16.9)
points(104.5,(16.9),pch=1,col='red',cex=3)
points(104.5,(beta0+(beta1*104.5)),pch='*',col='red',cex=3)
segments(104.5,16.9,104.5,(beta0+(beta1*104.5)),col='red',lty=2)

# highlight point (122.1,45.1)
points(122.1,(45.1),pch=1,col='red',cex=3)
points(122.1,(beta0+(beta1*122.1)),pch='*',col='red',cex=3)
segments(122.1,45.1,122.1,(beta0+(beta1*122.1)),col='red',lty=2)

# End Program