# load ggplot2 for plotting
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.5
# round digits to 3 decimal spaces
options(digits=3)
# load age an max heart data
age <- c(18,23,25,35,65,54,34,56,72,19,23,42,18,39,37)
mhr <- c(202,186,187,180,156,169,174,172,153,199,193,174,198,183,178)
# use lm function to find linear model
mhr.lm<- lm(mhr~age)
# get summary of the model
summary(mhr.lm)
##
## Call:
## lm(formula = mhr ~ age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.926 -2.538 0.388 3.187 6.624
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 210.048 2.867 73.3 < 2e-16 ***
## age -0.798 0.070 -11.4 3.8e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.58 on 13 degrees of freedom
## Multiple R-squared: 0.909, Adjusted R-squared: 0.902
## F-statistic: 130 on 1 and 13 DF, p-value: 3.85e-08
\(\hat{MaxHR} = 210.048 - 0.798*age\)
The effect of age on max heart rate is significant. The probability of hypothesis being true is less than 0.001 therefore we reject the hypothesis that age has no effect on max heart rate.
The significant level is 0.001.
# add data to data frame type to create plot
mhr.df <- data.frame(age, mhr)
# plot it
ggplot(mhr.df, aes(age, mhr)) + geom_line() + geom_smooth(method ="lm") + ggtitle("Age Vs Heart Rate")
Auto data set, perform a Linear Regression analysis using mpg as the dependent variable and the other 4 (displacement, horsepower, weight, acceleration) as independent variables.# read auto data
auto <- read.table('auto-mpg.data')
# assign column names
colnames(auto) <- c('displacement', 'horsepower', 'weight', 'acceleration', 'mpg')
# sample 40 records randomly
auto40 <- auto[sample(1:392, 40, replace=F),]
# check length if it is 40
length(auto40$displacement)
## [1] 40
# review first few records
head(auto40)
## displacement horsepower weight acceleration mpg
## 301 105 70 2150 14.9 34.5
## 143 76 52 1649 16.5 31.0
## 47 250 100 3282 15.0 19.0
## 188 304 120 3962 13.9 15.5
## 324 90 48 2085 21.7 44.3
## 177 120 88 2957 17.0 23.0
# use lm function to find linear model
mpg40.lm <- lm(mpg ~ displacement + horsepower + weight + acceleration, data = auto40)
# get lm summary
autosum40 <- summary(mpg40.lm)
autosum40
##
## Call:
## lm(formula = mpg ~ displacement + horsepower + weight + acceleration,
## data = auto40)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.292 -3.687 0.296 3.315 12.727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.74059 8.85691 5.50 3.5e-06 ***
## displacement -0.01079 0.02817 -0.38 0.704
## horsepower -0.03232 0.07185 -0.45 0.656
## weight -0.00563 0.00309 -1.82 0.077 .
## acceleration -0.13376 0.50341 -0.27 0.792
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.48 on 35 degrees of freedom
## Multiple R-squared: 0.738, Adjusted R-squared: 0.708
## F-statistic: 24.6 on 4 and 35 DF, p-value: 9.26e-10
\(\hat{mpg} = 48.741 -0.011 * displacement -0.032 * horsepower -0.006 * weight + -0.134 * acceleration\)
Weight has significant impact on mpg at 0.01.
# standard errors of coefficients
coef(autosum40)[, 2]
## (Intercept) displacement horsepower weight acceleration
## 8.85691 0.02817 0.07185 0.00309 0.50341
# confidence intervals at 95%
confint(mpg40.lm, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) 30.7601 6.67e+01
## displacement -0.0680 4.64e-02
## horsepower -0.1782 1.14e-01
## weight -0.0119 6.39e-04
## acceleration -1.1557 8.88e-01
Auto data set, perform a Linear Regression analysis using mpg as the dependent variable and the other 4 (displacement, horsepower, weight, acceleration) as independent variables.# Linear model for the entire auto data
mpg.lm <- lm(mpg ~ displacement + horsepower + weight + acceleration, data = auto)
# get summary of linear model
summary(mpg.lm)
##
## Call:
## lm(formula = mpg ~ displacement + horsepower + weight + acceleration,
## data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.378 -2.793 -0.333 2.193 16.256
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.251140 2.456045 18.42 < 2e-16 ***
## displacement -0.006001 0.006709 -0.89 0.3717
## horsepower -0.043608 0.016573 -2.63 0.0088 **
## weight -0.005281 0.000811 -6.51 2.3e-10 ***
## acceleration -0.023148 0.125601 -0.18 0.8539
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.25 on 387 degrees of freedom
## Multiple R-squared: 0.707, Adjusted R-squared: 0.704
## F-statistic: 233 on 4 and 387 DF, p-value: <2e-16
\(\hat{mpg} = 45.251 - 0.006 * displacement - 0.044 * horsepower - 0.005 * weight - 0.023 * acceleration\)
Horsepower and weight have significant impact on mpg at 0.01 and 0.001 significance levels, respectively.
# standard errors of coefficients
coef(summary(mpg.lm))[, 2]
## (Intercept) displacement horsepower weight acceleration
## 2.456045 0.006709 0.016573 0.000811 0.125601
# confidence intervals at 95%
confint(mpg.lm, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) 40.42228 50.08000
## displacement -0.01919 0.00719
## horsepower -0.07619 -0.01102
## weight -0.00687 -0.00369
## acceleration -0.27009 0.22380