Load in dataset and load packages

means and SDs in birthweight for smoking and non-smoking

# Frequency of each group (smoking vs. non-smoking)
freq(lab6$smoker)
## Frequencies  
## lab6$smoker  
## Type: Integer  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           0     20     47.62          47.62     47.62          47.62
##           1     22     52.38         100.00     52.38         100.00
##        <NA>      0                               0.00         100.00
##       Total     42    100.00         100.00    100.00         100.00
# mean and SD 

by_stats <- tapply(lab6$birthweight, lab6$smoker, function(x) c(n=length(x),
                                                                mean=mean(x),
                                                                sd=sd(x)))
by_stats
## $`0`
##        n     mean       sd 
## 20.00000  7.69000  1.14795 
## 
## $`1`
##         n      mean        sd 
## 22.000000  6.877273  1.389392

Run t-test

# Create group subsets
nonsmoker <- lab6[which(lab6$smoker == 0), ]
smoker    <- lab6[which(lab6$smoker == 1), ]

# independent-samples t-test 
t <- t.test(x = smoker$birthweight,
            y = nonsmoker$birthweight,
            var.equal = TRUE)
t
## 
##  Two Sample t-test
## 
## data:  smoker$birthweight and nonsmoker$birthweight
## t = -2.0545, df = 40, p-value = 0.0465
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.61224002 -0.01321452
## sample estimates:
## mean of x mean of y 
##  6.877273  7.690000

regression

reg <- lm(birthweight ~ smoker, data = lab6, na.action = na.exclude)
summary(reg)
## 
## Call:
## lm(formula = birthweight ~ smoker, data = lab6, na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.67727 -0.87727  0.07273  0.91000  3.12273 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   7.6900     0.2863  26.859   <2e-16 ***
## smoker       -0.8127     0.3956  -2.054   0.0465 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.28 on 40 degrees of freedom
## Multiple R-squared:  0.09545,    Adjusted R-squared:  0.07284 
## F-statistic: 4.221 on 1 and 40 DF,  p-value: 0.0465

#anova for r2

anova(reg)  
## Analysis of Variance Table
## 
## Response: birthweight
##           Df Sum Sq Mean Sq F value Pr(>F)  
## smoker     1  6.920  6.9198  4.2209 0.0465 *
## Residuals 40 65.577  1.6394                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Extract SS
(SS.Model <- anova(reg)[1,2])
## [1] 6.919792
(SS.Residual <- anova(reg)[2,2])
## [1] 65.57664
# Compute R²
(r.squared.1 <- SS.Model / (SS.Model + SS.Residual))
## [1] 0.09545011
# Convert to %
r.squared.1 * 100
## [1] 9.545011

cohens d

d <- function(x){
  b1  <- coef(x)[2]
  MSR <- anova(x)[2,3]   # residual mean square = pooled variance
  b1 / sqrt(MSR)         # Cohen's d
}
d(reg)
##     smoker 
## -0.6347464
# Confidence interval for coefficients
confint(reg)
##                 2.5 %      97.5 %
## (Intercept)  7.111355  8.26864485
## smoker      -1.612240 -0.01321452
# Combine with estimates
cbind(est = coef(reg), confint(reg))
##                    est     2.5 %      97.5 %
## (Intercept)  7.6900000  7.111355  8.26864485
## smoker      -0.8127273 -1.612240 -0.01321452