Read in the Data and libaries we will need

library(MASS)

child = c(1:27)
age = c(2,3,2,4,2,3,4,3,4,4,3,3,2,2,2,4,4,3,3,3,2,2,2,3,4,4,2)
sex = c(1,2,2,2,2,1,1,1,2,1,2,1,1,2,1,1,2,2,1,2,2,1,2,2,1,2,2) 
time = c(12,17,10,24,13,19,22,20,31,32,23,16,8,14,9,39,37,11,14,22,11,7,11,16,13,49,15)

## turn into data frame 
dat = data.frame(child,age,sex,time)
dat
##    child age sex time
## 1      1   2   1   12
## 2      2   3   2   17
## 3      3   2   2   10
## 4      4   4   2   24
## 5      5   2   2   13
## 6      6   3   1   19
## 7      7   4   1   22
## 8      8   3   1   20
## 9      9   4   2   31
## 10    10   4   1   32
## 11    11   3   2   23
## 12    12   3   1   16
## 13    13   2   1    8
## 14    14   2   2   14
## 15    15   2   1    9
## 16    16   4   1   39
## 17    17   4   2   37
## 18    18   3   2   11
## 19    19   3   1   14
## 20    20   3   2   22
## 21    21   2   2   11
## 22    22   2   1    7
## 23    23   2   2   11
## 24    24   3   2   16
## 25    25   4   1   13
## 26    26   4   2   49
## 27    27   2   2   15
age.1 = as.factor(age)
sex.1 = as.factor(sex)
data = data.frame(age.1,sex.1,time)

Let us plot our data to see what it looks like

plot(age.1,time, xlab = "AGE", ylab = "TIME")

plot(sex.1,time, xlab = "SEX", ylab = "TIME")

We will do a linear model to see how the data looks and make

some plots to see how our residuals look

lmout = lm(time ~ age + sex, data = dat)
summary(lmout)
## 
## Call:
## lm(formula = time ~ age + sex, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.5946  -2.5868   0.3985   2.4132  17.3872 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -16.469      6.362  -2.589   0.0161 *  
## age           10.011      1.547   6.472 1.08e-06 ***
## sex            4.018      2.531   1.587   0.1255    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.514 on 24 degrees of freedom
## Multiple R-squared:  0.6419, Adjusted R-squared:  0.612 
## F-statistic: 21.51 on 2 and 24 DF,  p-value: 4.454e-06
plot(lmout$fitted.values,lmout$residuals)

rstudRes <- studres(lmout)
qqnorm(rstudRes)

Our residuals seem to fanning out so we will do a boxcox transformation

boxcox(lmout)

It seems that our lambda is very close to zero so we will do a log

transformation, and see if our residuals look better

yprime = log((dat[,"time"]))
MOCK_DATA.1 = cbind(dat, yprime)
lmout.2 = lm(yprime ~ sex + age, data = MOCK_DATA.1)
summary(lmout.2) 
## 
## Call:
## lm(formula = yprime ~ sex + age, data = MOCK_DATA.1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.67753 -0.15059 -0.02919  0.19706  0.42864 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.00007    0.27051   3.697  0.00113 ** 
## sex          0.22070    0.10764   2.050  0.05141 .  
## age          0.50543    0.06578   7.683  6.4e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.277 on 24 degrees of freedom
## Multiple R-squared:  0.7179, Adjusted R-squared:  0.6944 
## F-statistic: 30.54 on 2 and 24 DF,  p-value: 2.54e-07
r.resid.p = rstandard(lmout.2) 
plot(lmout.2$fitted.values, r.resid.p)    
abline(0,0)     

qqnorm(r.resid.p)
abline(0,1)