Introduction
fit1<-lm(fev~Age, data = FEV)
summary(fit1)
##
## Call:
## lm(formula = fev ~ Age, data = FEV)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.57539 -0.34567 -0.04989 0.32124 2.12786
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.431648 0.077895 5.541 4.36e-08 ***
## Age 0.222041 0.007518 29.533 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5675 on 652 degrees of freedom
## Multiple R-squared: 0.5722, Adjusted R-squared: 0.5716
## F-statistic: 872.2 on 1 and 652 DF, p-value: < 2.2e-16
anova(fit1)
plot(fit1)
# load library
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 0.8.5
## v tidyr 1.0.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(haven)
#define base directory
setwd("C:/Users/belac/Desktop/Assigments")
#Read data
FEV<-read_dta("FEV.dta")
str(FEV)
## tibble [654 x 6] (S3: tbl_df/tbl/data.frame)
## $ Id : num [1:654] 301 451 501 642 901 ...
## ..- attr(*, "format.stata")= chr "%12.0g"
## $ Age : num [1:654] 9 8 7 9 9 8 6 6 8 9 ...
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ fev : num [1:654] 1.71 1.72 1.72 1.56 1.9 ...
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ Height: num [1:654] 57 67.5 54.5 53 57 61 58 56 58.5 60 ...
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ Sex : num [1:654] 0 0 0 1 1 0 0 0 0 0 ...
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ Smoke : num [1:654] 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "format.stata")= chr "%8.0g"
# univariate analysis
fit1<-lm(fev~Age, data = FEV)
summary(fit1)
##
## Call:
## lm(formula = fev ~ Age, data = FEV)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.57539 -0.34567 -0.04989 0.32124 2.12786
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.431648 0.077895 5.541 4.36e-08 ***
## Age 0.222041 0.007518 29.533 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5675 on 652 degrees of freedom
## Multiple R-squared: 0.5722, Adjusted R-squared: 0.5716
## F-statistic: 872.2 on 1 and 652 DF, p-value: < 2.2e-16
anova(fit1)
plot(fit1)
fit2<-lm(fev~Height, data = FEV)
summary(fit2)
##
## Call:
## lm(formula = fev ~ Height, data = FEV)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.75167 -0.26619 -0.00401 0.24474 2.11936
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.432679 0.181460 -29.94 <2e-16 ***
## Height 0.131976 0.002955 44.66 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4307 on 652 degrees of freedom
## Multiple R-squared: 0.7537, Adjusted R-squared: 0.7533
## F-statistic: 1995 on 1 and 652 DF, p-value: < 2.2e-16
anova(fit2)
plot(fit2)
fit3<-lm(fev~Sex, data = FEV)
summary(fit3)
##
## Call:
## lm(formula = fev ~ Sex, data = FEV)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.01645 -0.69420 -0.06367 0.58233 2.98055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.45117 0.04759 51.505 < 2e-16 ***
## Sex 0.36128 0.06640 5.441 7.5e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8487 on 652 degrees of freedom
## Multiple R-squared: 0.04344, Adjusted R-squared: 0.04197
## F-statistic: 29.61 on 1 and 652 DF, p-value: 7.496e-08
anova(fit3)
plot(fit3)
fit4<-lm(fev~Smoke, data = FEV)
summary(fit4)
##
## Call:
## lm(formula = fev ~ Smoke, data = FEV)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7751 -0.6339 -0.1021 0.4804 3.2269
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.56614 0.03466 74.037 < 2e-16 ***
## Smoke 0.71072 0.10994 6.464 1.99e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8412 on 652 degrees of freedom
## Multiple R-squared: 0.06023, Adjusted R-squared: 0.05879
## F-statistic: 41.79 on 1 and 652 DF, p-value: 1.993e-10
anova(fit4)
plot(fit4)
# multiple analysis
fitm<-lm(fev~Age+Height+Sex+Smoke, data = FEV)
anova(fitm)
summary(fitm)
##
## Call:
## lm(formula = fev ~ Age + Height + Sex + Smoke, data = FEV)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.37656 -0.25033 0.00894 0.25588 1.92047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.456974 0.222839 -20.001 < 2e-16 ***
## Age 0.065509 0.009489 6.904 1.21e-11 ***
## Height 0.104199 0.004758 21.901 < 2e-16 ***
## Sex 0.157103 0.033207 4.731 2.74e-06 ***
## Smoke -0.087246 0.059254 -1.472 0.141
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4122 on 649 degrees of freedom
## Multiple R-squared: 0.7754, Adjusted R-squared: 0.774
## F-statistic: 560 on 4 and 649 DF, p-value: < 2.2e-16
plot(fitm)