Age <- c(18, 23, 25, 35, 65, 54, 34, 56, 72, 19, 23, 42, 18, 39, 37)
Age
## [1] 18 23 25 35 65 54 34 56 72 19 23 42 18 39 37
MaxHR <- c(202, 186, 187, 180, 156, 169, 174, 172, 153, 199, 193, 174, 198, 183, 178)
MaxHR
## [1] 202 186 187 180 156 169 174 172 153 199 193 174 198 183 178
Age.MaxHR.df <- data.frame(Age = Age, MaxHeartRate = MaxHR)
summary(lm(MaxHR ~ Age, data=Age.MaxHR.df ))
##
## Call:
## lm(formula = MaxHR ~ Age, data = Age.MaxHR.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.9258 -2.5383 0.3879 3.1867 6.6242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 210.04846 2.86694 73.27 < 2e-16 ***
## Age -0.79773 0.06996 -11.40 3.85e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.578 on 13 degrees of freedom
## Multiple R-squared: 0.9091, Adjusted R-squared: 0.9021
## F-statistic: 130 on 1 and 13 DF, p-value: 3.848e-08
library(ggplot2)
ggplot(Age.MaxHR.df, aes(x=Age, y=MaxHeartRate)) +
geom_point(aes(color = MaxHeartRate)) +
geom_smooth(method ="lm") +
coord_cartesian() +
scale_color_gradient() +
ggtitle("Max Heart Rate to Age") +
xlab("Age") +
ylab("Max Heart Rate") +
theme_bw()
lm.r<- (lm(MaxHR ~ Age, data=Age.MaxHR.df ))
layout(matrix(1:4,2,2))
plot(lm.r)
##
## Call:
## lm(formula = MaxHR ~ Age, data = Age.MaxHR.df)
##
## Coefficients:
## (Intercept) Age
## 210.0485 -0.7977
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
##
## Call:
## lm(formula = MaxHR ~ Age, data = Age.MaxHR.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.9258 -2.5383 0.3879 3.1867 6.6242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 210.04846 2.86694 73.27 < 2e-16 ***
## Age -0.79773 0.06996 -11.40 3.85e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.578 on 13 degrees of freedom
## Multiple R-squared: 0.9091, Adjusted R-squared: 0.9021
## F-statistic: 130 on 1 and 13 DF, p-value: 3.848e-08
\[ MaxHR = 210.04846 - 0.79773 * Age \]
\[ (Intercept) 210.04846, 2.86694, 73.27 < 2e-16 *** \] \[ Age -0.79773, 0.06996, -11.40 3.85e-08 *** \]
\[ F-statistic: 130 on 1 and 13 DF, p-value: 3.848e-08 \]
# load the auto data
datapath <- "C:/CUNY/Courses/IS605/Assignments/Assignment11/auto-mpg.data"
autompg <- scan(datapath)
#str(autompg)
autompg.mtrx <- t(matrix(autompg, nrow = 5))
#head(autompg.mtrx)
# Perform a Linear Regression analysis using mpg as the dependent variable and
# the other 4 (displacement, horse-power,weight, acceleration) as independent variables.
# What is the final linear regression fit equation?
autompg.df <- data.frame( displacement = autompg.mtrx[,1],
horse_power = autompg.mtrx[,2],
weight = autompg.mtrx[,3],
acceleration = autompg.mtrx[,4],
mpg = autompg.mtrx[,5])
autompg.df.lm<- (lm(mpg ~ displacement + horse_power + weight + acceleration, data=autompg.df))
full_summary<- summary(lm(mpg ~ displacement + horse_power + weight + acceleration, data=autompg.df))
names(full_summary)
## [1] "call" "terms" "residuals" "coefficients"
## [5] "aliased" "sigma" "df" "r.squared"
## [9] "adj.r.squared" "fstatistic" "cov.unscaled"
# What is the final linear regression fit equation?
full_summary$coefficients[,1]
## (Intercept) displacement horse_power weight acceleration
## 45.251139699 -0.006000871 -0.043607731 -0.005280508 -0.023147999
\[ mpg = 45.251139699 - 0.006000871*displacement - 0.043607731*horse_power - 0.005280508*weight - 0.023147999*acceleration \]
From the summary below, we see that Horsepower and weight have a significant impact on mpg, with p values on their t-stats of 0.01 and 0.001 respectively.
full_summary$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.251139699 2.4560446927 18.4243959 7.072099e-55
## displacement -0.006000871 0.0067093055 -0.8944102 3.716584e-01
## horse_power -0.043607731 0.0165734633 -2.6311779 8.848982e-03
## weight -0.005280508 0.0008108541 -6.5122789 2.302545e-10
## acceleration -0.023147999 0.1256011622 -0.1842977 8.538765e-01
# What are the standard errors on each of the coefficients?
# Please see the below output for the standard errors
Std.Error<- data.frame(full_summary$coefficients[,2])
Std.Error
## full_summary.coefficients...2.
## (Intercept) 2.4560446927
## displacement 0.0067093055
## horse_power 0.0165734633
## weight 0.0008108541
## acceleration 0.1256011622
# The corresponding significance levels for the entire data
Significant.levels<- data.frame(full_summary$coefficients[,4])
Significant.levels
## full_summary.coefficients...4.
## (Intercept) 7.072099e-55
## displacement 3.716584e-01
## horse_power 8.848982e-03
## weight 2.302545e-10
## acceleration 8.538765e-01
# measure the 95% confidence intervals for the entire data
confint(autompg.df.lm, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 40.422278855 50.080000544
## displacement -0.019192122 0.007190380
## horse_power -0.076193029 -0.011022433
## weight -0.006874738 -0.003686277
## acceleration -0.270094049 0.223798050
# First take any random 40 data points from the entire auto data sample and perform
# the linear regression fit
# and measure the 95% confidence intervals.
# Then, take the entire data set (all 392 points) and perform linear regression and measure
# the 95% confidence intervals.
# Please report the resulting fit equation, their significance values and
# confidence intervals for each of the two runs.
set.seed(40)
autompg.df.Sample <- autompg.df[sample(1:length(autompg.df$displacement), 40, replace=F),]
autompg.df.Sample.lm<- (lm(mpg ~ displacement + horse_power + weight + acceleration, data=autompg.df.Sample))
sample40_summary<- summary(autompg.df.Sample.lm)
sample40_summary
##
## Call:
## lm(formula = mpg ~ displacement + horse_power + weight + acceleration,
## data = autompg.df.Sample)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5505 -3.1200 0.0593 2.2089 11.6046
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.071242 8.809153 5.570 2.84e-06 ***
## displacement 0.035539 0.023938 1.485 0.1466
## horse_power -0.037001 0.058676 -0.631 0.5324
## weight -0.009695 0.003873 -2.503 0.0171 *
## acceleration 0.022843 0.446559 0.051 0.9595
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.558 on 35 degrees of freedom
## Multiple R-squared: 0.6768, Adjusted R-squared: 0.6399
## F-statistic: 18.32 on 4 and 35 DF, p-value: 3.343e-08
\[ mpg = 49.071242127 - 0.035538507*displacement - 0.037001317*horse_power - 0.009694914*weight - 0.022843319*acceleration \]
data.frame(sample40_summary$coefficients[,1])
## sample40_summary.coefficients...1.
## (Intercept) 49.071242127
## displacement 0.035538507
## horse_power -0.037001317
## weight -0.009694914
## acceleration 0.022843319
# The corresponding significance levels for the sample
Significant.levels.sample<- data.frame(sample40_summary$coefficients[,4])
Significant.levels.sample
## sample40_summary.coefficients...4.
## (Intercept) 2.844906e-06
## displacement 1.465941e-01
## horse_power 5.323991e-01
## weight 1.711424e-02
## acceleration 9.594935e-01
# measure the 95% confidence intervals for the sample
confint(autompg.df.Sample.lm, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 31.18771089 66.954773366
## displacement -0.01305754 0.084134556
## horse_power -0.15612090 0.082118270
## weight -0.01755675 -0.001833079
## acceleration -0.88371894 0.929405577
#
From the summary below of th eentire data set, we see that Horsepower and weight have a significant impact on mpg, with p values on their t-stats of 0.01 and 0.001 respectively.
full_summary
##
## Call:
## lm(formula = mpg ~ displacement + horse_power + weight + acceleration,
## data = autompg.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.378 -2.793 -0.333 2.193 16.256
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.2511397 2.4560447 18.424 < 2e-16 ***
## displacement -0.0060009 0.0067093 -0.894 0.37166
## horse_power -0.0436077 0.0165735 -2.631 0.00885 **
## weight -0.0052805 0.0008109 -6.512 2.3e-10 ***
## acceleration -0.0231480 0.1256012 -0.184 0.85388
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.247 on 387 degrees of freedom
## Multiple R-squared: 0.707, Adjusted R-squared: 0.704
## F-statistic: 233.4 on 4 and 387 DF, p-value: < 2.2e-16
However, from the summary of sample 40 data below, only weight has a low signaficance
sample40_summary
##
## Call:
## lm(formula = mpg ~ displacement + horse_power + weight + acceleration,
## data = autompg.df.Sample)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5505 -3.1200 0.0593 2.2089 11.6046
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.071242 8.809153 5.570 2.84e-06 ***
## displacement 0.035539 0.023938 1.485 0.1466
## horse_power -0.037001 0.058676 -0.631 0.5324
## weight -0.009695 0.003873 -2.503 0.0171 *
## acceleration 0.022843 0.446559 0.051 0.9595
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.558 on 35 degrees of freedom
## Multiple R-squared: 0.6768, Adjusted R-squared: 0.6399
## F-statistic: 18.32 on 4 and 35 DF, p-value: 3.343e-08