Steven — Mar 8, 2014, 1:29 PM
# Sanderson, Steven
# Homework 3
# Question 1 a)
m <- as.table(rbind(c(2,2), c(8,33)))
dimnames(m) <- list("Low Exposure" = c("Yes", "No"),
"High Exposure" = c("Yes", "No"))
mcnemar.test(m)
McNemar's Chi-squared test with continuity correction
data: m
McNemar's chi-squared = 2.5, df = 1, p-value = 0.1138
# Question 1 b)
# The conclusion is to fail to reject
# The critical value of Chi-square with 1 d.f. at alpha = 0.05 is 3.84, since Chi-square = 2.5
# we fail to reject the null hypothesis
# Question 2
ptCholesterol <- c(5.12, 6.18, 6.77, 6.65, 6.36, 5.9, 5.48, 6.02, 10.34, 8.51)
ptTriglyceride <- c(2.3, 2.54, 2.95, 3.77, 4.18, 5.31, 5.53, 8.83, 9.48, 14.2)
plot(ptCholesterol, ptTriglyceride, main = "No apparent linear relationship")
CT <- lm(ptCholesterol ~ ptTriglyceride)
summary(CT)
Call:
lm(formula = ptCholesterol ~ ptTriglyceride)
Residuals:
Min 1Q Median 3Q Max
-1.490 -0.668 -0.171 0.450 2.657
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.161 0.763 6.77 0.00014 ***
ptTriglyceride 0.266 0.110 2.42 0.04204 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.26 on 8 degrees of freedom
Multiple R-squared: 0.422, Adjusted R-squared: 0.35
F-statistic: 5.84 on 1 and 8 DF, p-value: 0.042
anova(CT)
Analysis of Variance Table
Response: ptCholesterol
Df Sum Sq Mean Sq F value Pr(>F)
ptTriglyceride 1 9.28 9.28 5.84 0.042 *
Residuals 8 12.71 1.59
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Pearson Correlation Coefficient
cor(ptCholesterol, ptTriglyceride,
method = "pearson")
[1] 0.6497
Lxy <- sum((ptCholesterol - mean(ptCholesterol))*(ptTriglyceride-mean(ptTriglyceride)))
Lxy
[1] 34.9
Lxx <- sum((ptCholesterol - mean(ptCholesterol))^2)
Lxx
[1] 22
Lyy <- sum((ptTriglyceride - mean(ptTriglyceride))^2)
Lyy
[1] 131.2
r = Lxy/sqrt(Lxx*Lyy)
r
[1] 0.6497
t = (r * sqrt(length(ptCholesterol)-2)) / (sqrt(1 - r^2))
t
[1] 2.417
qt(0.95,8,0)
[1] 1.86
# qt(0.95, 8, 0) = 1.86 is the critical value, since 2.42 > 1.86 then p-value < alpha reject null hypothesis
t > qt(0.95,8,0)
[1] TRUE
# Spearman
rs <- 1 - (6*sum((ptCholesterol - ptTriglyceride)^2))/(10*(10^2 - 1))
rs
[1] 0.4533
tspear = (rs * sqrt(length(ptCholesterol)-2)) / (sqrt(1 - rs^2))
tspear
[1] 1.438
tspear > qt(0.95, 8, 0)
[1] FALSE
# Question 3 the lab
# Read in the data for the cars.csv file
cars1 <- read.csv("cars.csv", header = TRUE)
str(cars1)
'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : int 6 6 4 6 8 6 8 4 4 6 ...
$ disp: num 160 160 108 258 360 ...
$ hp : int 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : int 0 0 1 1 0 1 0 1 1 1 ...
$ am : int 1 1 1 0 0 0 0 0 0 0 ...
$ gear: int 4 4 4 3 3 3 3 4 4 4 ...
$ carb: int 4 4 1 1 2 1 4 2 2 4 ...
# From the above we can see that cars1 is already a data.frame so no need to put it into one
# check the head of the data.frame
head(cars1)
mpg cyl disp hp drat wt qsec vs am gear carb
1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
4 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
5 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
6 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# Check the dimensions of the cars1 data.frame
dim(cars1)
[1] 32 11
# Get a subset of data.frame with 5 columns ("mpg", "cyl", "hp", "wt", "am") and save to cars2
cars2 <- data.frame(cars1[c("mpg", "cyl", "hp", "wt", "am")])
head(cars2)
mpg cyl hp wt am
1 21.0 6 110 2.620 1
2 21.0 6 110 2.875 1
3 22.8 4 93 2.320 1
4 21.4 6 110 3.215 0
5 18.7 8 175 3.440 0
6 18.1 6 105 3.460 0
# Performa a t.test on mpg (between am=0 and am=1)
am0 <- data.frame(cars2[cars2$am == 0,])
am1 <- data.frame(cars2[cars2$am == 1,])
head(am0) # Check data
mpg cyl hp wt am
4 21.4 6 110 3.215 0
5 18.7 8 175 3.440 0
6 18.1 6 105 3.460 0
7 14.3 8 245 3.570 0
8 24.4 4 62 3.190 0
9 22.8 4 95 3.150 0
head(am1) # Check data
mpg cyl hp wt am
1 21.0 6 110 2.620 1
2 21.0 6 110 2.875 1
3 22.8 4 93 2.320 1
18 32.4 4 66 2.200 1
19 30.4 4 52 1.615 1
20 33.9 4 65 1.835 1
t.test(am0$mpg, am1$mpg)
Welch Two Sample t-test
data: am0$mpg and am1$mpg
t = -3.767, df = 18.33, p-value = 0.001374
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-11.28 -3.21
sample estimates:
mean of x mean of y
17.15 24.39
# Construct a scatterplot of mpg vs cyl
attach(cars2)
par(mfrow = c(3,1))
plot(cyl, mpg)
plot(hp, mpg)
plot(wt, mpg)
detach(cars2)
# Question 4 Fit a regression line relating the Cholesterol Level to the Triglyceride level
plot(ptCholesterol, ptTriglyceride)
abline(CT)