Create a data frame

library(Stat2Data)
data("Kids198")

Loading libraries

library(ggplot2)
library(HH)
## Loading required package: lattice
## Loading required package: grid
## Loading required package: latticeExtra
## Loading required package: RColorBrewer
## 
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
## 
##     layer
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
## Loading required package: gridExtra

A quick look at the data

head(Kids198)
##   Height Weight Age Sex Race
## 1   67.8    166 210   0    1
## 2   63.0     93 144   1    0
## 3   50.1     54 119   0    0
## 4   55.7     69 130   1    0
## 5   63.2    115 157   0    0
## 6   48.8     52 102   0    0
plot(Weight~Age, pch=Sex, data=Kids198)

Subsetting the dataframe by sex

male <- subset(Kids198, Sex==0)
female <- subset(Kids198, Sex==1)

Fitting a regression model for the females

regmodelf=lm(Weight~Age, data=female) 
summary(regmodelf)
## 
## Call:
## lm(formula = Weight ~ Age, data = female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.381 -11.849  -4.341  10.351  58.581 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.84197    7.90251  -0.233    0.816    
## Age          0.62749    0.04937  12.710   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.48 on 100 degrees of freedom
## Multiple R-squared:  0.6176, Adjusted R-squared:  0.6138 
## F-statistic: 161.5 on 1 and 100 DF,  p-value: < 2.2e-16
anova(regmodelf)
## Analysis of Variance Table
## 
## Response: Weight
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Age         1  49365   49365  161.53 < 2.2e-16 ***
## Residuals 100  30560     306                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ci.plot(regmodelf)

Fitting a regression model for the males

regmodelm=lm(Weight~Age, data=male) 
summary(regmodelm)
## 
## Call:
## lm(formula = Weight ~ Age, data = male)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -46.884 -12.918  -1.792  10.148  47.373 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -33.69254   10.87366  -3.099  0.00257 ** 
## Age           0.90871    0.06635  13.696  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.85 on 94 degrees of freedom
## Multiple R-squared:  0.6662, Adjusted R-squared:  0.6626 
## F-statistic: 187.6 on 1 and 94 DF,  p-value: < 2.2e-16
anova(regmodelm)
## Analysis of Variance Table
## 
## Response: Weight
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Age        1  81525   81525  187.58 < 2.2e-16 ***
## Residuals 94  40853     435                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ci.plot(regmodelm)

Creating a scatterplot with different symbols and two different regression lines

plot(Weight~Age, pch=Sex, data=Kids198)
abline(regmodelm, col="blue")
abline(regmodelf, col="pink")

A multiple regression model for both males and females

regmodel=lm(Weight~Age+Sex+Sex*Age, data=Kids198) 
summary(regmodel)
## 
## Call:
## lm(formula = Weight ~ Age + Sex + Sex * Age, data = Kids198)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -46.884 -12.055  -2.782  10.185  58.581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -33.69254   10.00727  -3.367 0.000917 ***
## Age           0.90871    0.06106  14.882  < 2e-16 ***
## Sex          31.85057   13.24269   2.405 0.017106 *  
## Age:Sex      -0.28122    0.08164  -3.445 0.000700 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.19 on 194 degrees of freedom
## Multiple R-squared:  0.6683, Adjusted R-squared:  0.6631 
## F-statistic: 130.3 on 3 and 194 DF,  p-value: < 2.2e-16
anova(regmodel)
## Analysis of Variance Table
## 
## Response: Weight
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Age         1 131450  131450 357.092 < 2.2e-16 ***
## Sex         1   8046    8046  21.858 5.488e-06 ***
## Age:Sex     1   4368    4368  11.866 0.0007004 ***
## Residuals 194  71414     368                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(regmodel)
##                   2.5 %      97.5 %
## (Intercept) -53.4295458 -13.9555301
## Age           0.7882792   1.0291404
## Sex           5.7324387  57.9686922
## Age:Sex      -0.4422327  -0.1202112

A multiple regression model with the same slopes (parallel lines model)

parallel=lm(Weight~Age+Sex, data=Kids198) 
summary(parallel)
## 
## Call:
## lm(formula = Weight ~ Age + Sex, data = Kids198)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -47.311 -12.577  -1.933  10.024  58.727 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.40655    6.98870  -1.203     0.23    
## Age           0.75138    0.04164  18.043  < 2e-16 ***
## Sex         -12.78433    2.80963  -4.550 9.41e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.71 on 195 degrees of freedom
## Multiple R-squared:  0.648,  Adjusted R-squared:  0.6444 
## F-statistic: 179.5 on 2 and 195 DF,  p-value: < 2.2e-16
anova(parallel)
## Analysis of Variance Table
## 
## Response: Weight
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Age         1 131450  131450 338.243 < 2.2e-16 ***
## Sex         1   8046    8046  20.704 9.412e-06 ***
## Residuals 195  75782     389                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(parallel, regmodel)
## Analysis of Variance Table
## 
## Model 1: Weight ~ Age + Sex
## Model 2: Weight ~ Age + Sex + Sex * Age
##   Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
## 1    195 75782                                  
## 2    194 71414  1    4368.2 11.866 0.0007004 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interesting plots and scatterplot smoothing

qplot(Age, Weight, color=Sex, data=Kids198)

qplot(Age, Weight, facets= ~Sex, data=Kids198)

qplot(Age, Weight, facets= ~Sex, geom=c("point", "smooth"), data=Kids198)
## `geom_smooth()` using method = 'loess'