sanderson_steven_hw3.R

Steven — Mar 8, 2014, 1:29 PM

# Sanderson, Steven

# Homework 3

# Question 1 a)
m <- as.table(rbind(c(2,2), c(8,33)))
dimnames(m) <- list("Low Exposure" = c("Yes", "No"),
                    "High Exposure" = c("Yes", "No"))
mcnemar.test(m)

    McNemar's Chi-squared test with continuity correction

data:  m
McNemar's chi-squared = 2.5, df = 1, p-value = 0.1138

# Question 1 b)
# The conclusion is to fail to reject
# The critical value of Chi-square with 1 d.f. at alpha = 0.05 is 3.84, since Chi-square = 2.5
# we fail to reject the null hypothesis


# Question 2
ptCholesterol <- c(5.12, 6.18, 6.77, 6.65, 6.36, 5.9, 5.48, 6.02, 10.34, 8.51)
ptTriglyceride <- c(2.3, 2.54, 2.95, 3.77, 4.18, 5.31, 5.53, 8.83, 9.48, 14.2)
plot(ptCholesterol, ptTriglyceride, main = "No apparent linear relationship")

plot of chunk unnamed-chunk-1


CT <- lm(ptCholesterol ~ ptTriglyceride)
summary(CT)

Call:
lm(formula = ptCholesterol ~ ptTriglyceride)

Residuals:
   Min     1Q Median     3Q    Max 
-1.490 -0.668 -0.171  0.450  2.657 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)       5.161      0.763    6.77  0.00014 ***
ptTriglyceride    0.266      0.110    2.42  0.04204 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.26 on 8 degrees of freedom
Multiple R-squared:  0.422, Adjusted R-squared:  0.35 
F-statistic: 5.84 on 1 and 8 DF,  p-value: 0.042
anova(CT)
Analysis of Variance Table

Response: ptCholesterol
               Df Sum Sq Mean Sq F value Pr(>F)  
ptTriglyceride  1   9.28    9.28    5.84  0.042 *
Residuals       8  12.71    1.59                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


# Pearson Correlation Coefficient
cor(ptCholesterol, ptTriglyceride, 
    method = "pearson")
[1] 0.6497

Lxy <- sum((ptCholesterol - mean(ptCholesterol))*(ptTriglyceride-mean(ptTriglyceride)))
Lxy
[1] 34.9
Lxx <- sum((ptCholesterol - mean(ptCholesterol))^2)
Lxx
[1] 22
Lyy <- sum((ptTriglyceride - mean(ptTriglyceride))^2)
Lyy
[1] 131.2
r = Lxy/sqrt(Lxx*Lyy)
r
[1] 0.6497
t = (r * sqrt(length(ptCholesterol)-2)) / (sqrt(1 - r^2))
t
[1] 2.417
qt(0.95,8,0)
[1] 1.86
# qt(0.95, 8, 0) = 1.86 is the critical value, since 2.42 > 1.86 then p-value < alpha reject null hypothesis
t > qt(0.95,8,0)
[1] TRUE

# Spearman
rs <- 1 - (6*sum((ptCholesterol - ptTriglyceride)^2))/(10*(10^2 - 1))
rs
[1] 0.4533
tspear = (rs * sqrt(length(ptCholesterol)-2)) / (sqrt(1 - rs^2))
tspear
[1] 1.438
tspear > qt(0.95, 8, 0)
[1] FALSE

# Question 3 the lab
# Read in the data for the cars.csv file
cars1 <- read.csv("cars.csv", header = TRUE)
str(cars1)
'data.frame':   32 obs. of  11 variables:
 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 $ cyl : int  6 6 4 6 8 6 8 4 4 6 ...
 $ disp: num  160 160 108 258 360 ...
 $ hp  : int  110 110 93 110 175 105 245 62 95 123 ...
 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
 $ qsec: num  16.5 17 18.6 19.4 17 ...
 $ vs  : int  0 0 1 1 0 1 0 1 1 1 ...
 $ am  : int  1 1 1 0 0 0 0 0 0 0 ...
 $ gear: int  4 4 4 3 3 3 3 4 4 4 ...
 $ carb: int  4 4 1 1 2 1 4 2 2 4 ...
# From the above we can see that cars1 is already a data.frame so no need to put it into one
# check the head of the data.frame
head(cars1)
   mpg cyl disp  hp drat    wt  qsec vs am gear carb
1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
2 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
3 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
4 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
5 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
6 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
# Check the dimensions of the cars1 data.frame
dim(cars1)
[1] 32 11
# Get a subset of data.frame with 5 columns ("mpg", "cyl", "hp", "wt", "am") and save to cars2
cars2 <- data.frame(cars1[c("mpg", "cyl", "hp", "wt", "am")])
head(cars2)
   mpg cyl  hp    wt am
1 21.0   6 110 2.620  1
2 21.0   6 110 2.875  1
3 22.8   4  93 2.320  1
4 21.4   6 110 3.215  0
5 18.7   8 175 3.440  0
6 18.1   6 105 3.460  0
# Performa a t.test on mpg (between am=0 and am=1)
am0 <- data.frame(cars2[cars2$am == 0,])
am1 <- data.frame(cars2[cars2$am == 1,])
head(am0) # Check data
   mpg cyl  hp    wt am
4 21.4   6 110 3.215  0
5 18.7   8 175 3.440  0
6 18.1   6 105 3.460  0
7 14.3   8 245 3.570  0
8 24.4   4  62 3.190  0
9 22.8   4  95 3.150  0
head(am1) # Check data
    mpg cyl  hp    wt am
1  21.0   6 110 2.620  1
2  21.0   6 110 2.875  1
3  22.8   4  93 2.320  1
18 32.4   4  66 2.200  1
19 30.4   4  52 1.615  1
20 33.9   4  65 1.835  1
t.test(am0$mpg, am1$mpg)

    Welch Two Sample t-test

data:  am0$mpg and am1$mpg
t = -3.767, df = 18.33, p-value = 0.001374
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -11.28  -3.21
sample estimates:
mean of x mean of y 
    17.15     24.39 
# Construct a scatterplot of mpg vs cyl
attach(cars2)
par(mfrow = c(3,1))
plot(cyl, mpg)
plot(hp, mpg)
plot(wt, mpg)

plot of chunk unnamed-chunk-1

detach(cars2)

# Question 4 Fit a regression line relating the Cholesterol Level to the Triglyceride level
plot(ptCholesterol, ptTriglyceride)
abline(CT)

plot of chunk unnamed-chunk-1