Introduction

fit1<-lm(fev~Age, data = FEV)
summary(fit1)
## 
## Call:
## lm(formula = fev ~ Age, data = FEV)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.57539 -0.34567 -0.04989  0.32124  2.12786 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.431648   0.077895   5.541 4.36e-08 ***
## Age         0.222041   0.007518  29.533  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5675 on 652 degrees of freedom
## Multiple R-squared:  0.5722, Adjusted R-squared:  0.5716 
## F-statistic: 872.2 on 1 and 652 DF,  p-value: < 2.2e-16
anova(fit1)
plot(fit1)

# load library
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   0.8.5
## v tidyr   1.0.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(haven)
#define base directory
setwd("C:/Users/belac/Desktop/Assigments")
#Read data
FEV<-read_dta("FEV.dta")
str(FEV)
## tibble [654 x 6] (S3: tbl_df/tbl/data.frame)
##  $ Id    : num [1:654] 301 451 501 642 901 ...
##   ..- attr(*, "format.stata")= chr "%12.0g"
##  $ Age   : num [1:654] 9 8 7 9 9 8 6 6 8 9 ...
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ fev   : num [1:654] 1.71 1.72 1.72 1.56 1.9 ...
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ Height: num [1:654] 57 67.5 54.5 53 57 61 58 56 58.5 60 ...
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ Sex   : num [1:654] 0 0 0 1 1 0 0 0 0 0 ...
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ Smoke : num [1:654] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "format.stata")= chr "%8.0g"
# univariate analysis
fit1<-lm(fev~Age, data = FEV)
summary(fit1)
## 
## Call:
## lm(formula = fev ~ Age, data = FEV)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.57539 -0.34567 -0.04989  0.32124  2.12786 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.431648   0.077895   5.541 4.36e-08 ***
## Age         0.222041   0.007518  29.533  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5675 on 652 degrees of freedom
## Multiple R-squared:  0.5722, Adjusted R-squared:  0.5716 
## F-statistic: 872.2 on 1 and 652 DF,  p-value: < 2.2e-16
anova(fit1)
plot(fit1)

fit2<-lm(fev~Height, data = FEV)
summary(fit2)
## 
## Call:
## lm(formula = fev ~ Height, data = FEV)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.75167 -0.26619 -0.00401  0.24474  2.11936 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.432679   0.181460  -29.94   <2e-16 ***
## Height       0.131976   0.002955   44.66   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4307 on 652 degrees of freedom
## Multiple R-squared:  0.7537, Adjusted R-squared:  0.7533 
## F-statistic:  1995 on 1 and 652 DF,  p-value: < 2.2e-16
anova(fit2)
plot(fit2)

fit3<-lm(fev~Sex, data = FEV)
summary(fit3)
## 
## Call:
## lm(formula = fev ~ Sex, data = FEV)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.01645 -0.69420 -0.06367  0.58233  2.98055 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.45117    0.04759  51.505  < 2e-16 ***
## Sex          0.36128    0.06640   5.441  7.5e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8487 on 652 degrees of freedom
## Multiple R-squared:  0.04344,    Adjusted R-squared:  0.04197 
## F-statistic: 29.61 on 1 and 652 DF,  p-value: 7.496e-08
anova(fit3)
plot(fit3)

fit4<-lm(fev~Smoke, data = FEV)
summary(fit4)
## 
## Call:
## lm(formula = fev ~ Smoke, data = FEV)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7751 -0.6339 -0.1021  0.4804  3.2269 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.56614    0.03466  74.037  < 2e-16 ***
## Smoke        0.71072    0.10994   6.464 1.99e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8412 on 652 degrees of freedom
## Multiple R-squared:  0.06023,    Adjusted R-squared:  0.05879 
## F-statistic: 41.79 on 1 and 652 DF,  p-value: 1.993e-10
anova(fit4)
plot(fit4)

# multiple analysis
fitm<-lm(fev~Age+Height+Sex+Smoke, data = FEV)
anova(fitm)
summary(fitm)
## 
## Call:
## lm(formula = fev ~ Age + Height + Sex + Smoke, data = FEV)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.37656 -0.25033  0.00894  0.25588  1.92047 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.456974   0.222839 -20.001  < 2e-16 ***
## Age          0.065509   0.009489   6.904 1.21e-11 ***
## Height       0.104199   0.004758  21.901  < 2e-16 ***
## Sex          0.157103   0.033207   4.731 2.74e-06 ***
## Smoke       -0.087246   0.059254  -1.472    0.141    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4122 on 649 degrees of freedom
## Multiple R-squared:  0.7754, Adjusted R-squared:  0.774 
## F-statistic:   560 on 4 and 649 DF,  p-value: < 2.2e-16
plot(fitm)