# Frequency of each group (smoking vs. non-smoking)
freq(lab6$smoker)
## Frequencies
## lab6$smoker
## Type: Integer
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 0 20 47.62 47.62 47.62 47.62
## 1 22 52.38 100.00 52.38 100.00
## <NA> 0 0.00 100.00
## Total 42 100.00 100.00 100.00 100.00
# mean and SD
by_stats <- tapply(lab6$birthweight, lab6$smoker, function(x) c(n=length(x),
mean=mean(x),
sd=sd(x)))
by_stats
## $`0`
## n mean sd
## 20.00000 7.69000 1.14795
##
## $`1`
## n mean sd
## 22.000000 6.877273 1.389392
# Create group subsets
nonsmoker <- lab6[which(lab6$smoker == 0), ]
smoker <- lab6[which(lab6$smoker == 1), ]
# independent-samples t-test
t <- t.test(x = smoker$birthweight,
y = nonsmoker$birthweight,
var.equal = TRUE)
t
##
## Two Sample t-test
##
## data: smoker$birthweight and nonsmoker$birthweight
## t = -2.0545, df = 40, p-value = 0.0465
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.61224002 -0.01321452
## sample estimates:
## mean of x mean of y
## 6.877273 7.690000
reg <- lm(birthweight ~ smoker, data = lab6, na.action = na.exclude)
summary(reg)
##
## Call:
## lm(formula = birthweight ~ smoker, data = lab6, na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.67727 -0.87727 0.07273 0.91000 3.12273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.6900 0.2863 26.859 <2e-16 ***
## smoker -0.8127 0.3956 -2.054 0.0465 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.28 on 40 degrees of freedom
## Multiple R-squared: 0.09545, Adjusted R-squared: 0.07284
## F-statistic: 4.221 on 1 and 40 DF, p-value: 0.0465
#anova for r2
anova(reg)
## Analysis of Variance Table
##
## Response: birthweight
## Df Sum Sq Mean Sq F value Pr(>F)
## smoker 1 6.920 6.9198 4.2209 0.0465 *
## Residuals 40 65.577 1.6394
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Extract SS
(SS.Model <- anova(reg)[1,2])
## [1] 6.919792
(SS.Residual <- anova(reg)[2,2])
## [1] 65.57664
# Compute R²
(r.squared.1 <- SS.Model / (SS.Model + SS.Residual))
## [1] 0.09545011
# Convert to %
r.squared.1 * 100
## [1] 9.545011
d <- function(x){
b1 <- coef(x)[2]
MSR <- anova(x)[2,3] # residual mean square = pooled variance
b1 / sqrt(MSR) # Cohen's d
}
d(reg)
## smoker
## -0.6347464
# Confidence interval for coefficients
confint(reg)
## 2.5 % 97.5 %
## (Intercept) 7.111355 8.26864485
## smoker -1.612240 -0.01321452
# Combine with estimates
cbind(est = coef(reg), confint(reg))
## est 2.5 % 97.5 %
## (Intercept) 7.6900000 7.111355 8.26864485
## smoker -0.8127273 -1.612240 -0.01321452