Data of height and weight

Suppose data of height and weight are collected from a sample of 10 Student.

##         height   weight
##  [1,] 1.562347 50.40805
##  [2,] 1.587262 54.36766
##  [3,] 1.596453 50.55602
##  [4,] 1.639889 55.93452
##  [5,] 1.693347 60.56236
##  [6,] 1.695561 45.31824
##  [7,] 1.715678 58.01562
##  [8,] 1.730744 56.43253
##  [9,] 1.739897 58.13044
## [10,] 1.779681 68.61954

Scatter Plot: X vs. Y

plot(x[order(x)],y[order(x)],xlab="Height",ylab="Weight",cex=2)

Fit a step function

plot(x[order(x)],y[order(x)],xlab="Height",ylab="Weight",type="s",cex=2,col="blue")
points(x[order(x)],y[order(x)],cex=2)

Fit a curve function

hwdata=as.data.frame(cbind(height=x[order(x)],weight=y[order(x)]))
nlmhw <- nls(weight ~ a+b*height+c*height^2,
    data = hwdata, start = list(a = 1,b=1,c=1))
  
ab=summary(nlmhw)$coef[,1]
plot(x[order(x)],y[order(x)],xlab="height",ylab="weight",cex=2)
curve(ab[1]+ab[2]*x++ab[3]*x^2,1.5,1.8,add=TRUE,lwd=2,col="red")

Linear Model

plot(x[order(x)],y[order(x)],xlab="Height",ylab="Weight",cex=2)
abline(lm(y[order(x)]~x[order(x)]),lwd=2)

Model Fitting

How to fit a linear regression?

Least squares criterion:

# plot scatterplot and the regression line
mod1 <- lm(y ~ x)
plot(x, y,xlab="Height",ylab="Weight",cex=2)
abline(mod1, lwd=2)

# calculate residuals and predicted values
res <- signif(residuals(mod1), 5)
pre <- predict(mod1)

# plot distances between points and the regression line
segments(x, y, x, pre, col="red")

summary(mod1)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.682  -1.223   0.281   2.923   7.054 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   -35.03      40.22  -0.871   0.4092  
## x              54.28      24.00   2.261   0.0536 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.285 on 8 degrees of freedom
## Multiple R-squared:  0.3899, Adjusted R-squared:  0.3136 
## F-statistic: 5.113 on 1 and 8 DF,  p-value: 0.05363

Compare with the true model

plot(x[order(x)],y[order(x)],xlab="Height",ylab="Weight",cex=2)
abline(lm(y[order(x)]~x[order(x)]),lwd=2)
curve(21*x^2,1.5,1.8,add=TRUE,lwd=2,col="red")
legend("topleft", c(expression(y == "-35.03+54.28x"),expression(y == paste(21,x^2))),lty=1,col=1:2,lwd=2)