Data BodyTemp50 were collected to see if the mean body temperature for humans differs from 98.6F. Based on the sample in the data, test whether there is evidence that the mean body temperature is different from 98.6F
\(H_0=98.6\)
\(H_A\neq 98.6\)
##Read this, remove "##" in the next line then click run
##install.packages("Lock5Data")
library("Lock5Data")
## Warning: package 'Lock5Data' was built under R version 3.5.2
data(BodyTemp50)
head(BodyTemp50)
## BodyTemp Pulse Gender
## 1 97.6 69 0
## 2 99.4 77 1
## 3 99.0 75 0
## 4 98.8 84 1
## 5 98.0 71 0
## 6 98.9 76 1
nrow(BodyTemp50) ## sample size
## [1] 50
with(BodyTemp50, mean(BodyTemp))
## [1] 98.26
mean(BodyTemp50$BodyTemp)
## [1] 98.26
with(BodyTemp50, sd(BodyTemp))
## [1] 0.7653197
t.test(BodyTemp50$BodyTemp, alternative = "two.sided",
mu = 98.6)
##
## One Sample t-test
##
## data: BodyTemp50$BodyTemp
## t = -3.1414, df = 49, p-value = 0.002851
## alternative hypothesis: true mean is not equal to 98.6
## 95 percent confidence interval:
## 98.0425 98.4775
## sample estimates:
## mean of x
## 98.26
t.test(BodyTemp50$BodyTemp, alternative = "less",
mu = 98.6)
##
## One Sample t-test
##
## data: BodyTemp50$BodyTemp
## t = -3.1414, df = 49, p-value = 0.001425
## alternative hypothesis: true mean is less than 98.6
## 95 percent confidence interval:
## -Inf 98.44146
## sample estimates:
## mean of x
## 98.26
t.test(BodyTemp50$BodyTemp, alternative = "greater",
mu = 98.6)
##
## One Sample t-test
##
## data: BodyTemp50$BodyTemp
## t = -3.1414, df = 49, p-value = 0.9986
## alternative hypothesis: true mean is greater than 98.6
## 95 percent confidence interval:
## 98.07854 Inf
## sample estimates:
## mean of x
## 98.26
Course Example
x<-c(4.2,3.1,5.7,2.9)
y<-c(7.1,4.2,10,4.1)
fit<-lm(y~x)
summary(fit)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## 1 2 3 4
## 0.26043 -0.24613 -0.10334 0.08903
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.2990 0.5031 -4.569 0.04471 *
## x 2.1758 0.1219 17.851 0.00312 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2711 on 2 degrees of freedom
## Multiple R-squared: 0.9938, Adjusted R-squared: 0.9906
## F-statistic: 318.7 on 1 and 2 DF, p-value: 0.003124
plot(fit)
plot(x,y)
abline(fit,col="blue")
Let’s do prediction
new <- data.frame(x = seq(-3, 3, 0.5))
prediction=predict(fit, new, se.fit = TRUE, interval = "prediction")
Data<-data.frame(new,prediction$fit)
plot(Data$x,Data$fit)
abline(fit)
##add prediction intervel
lines(Data$x,Data$lwr,col="red")
lines(Data$x,Data$upr,col="red")
##Read this, remove "##" in the next line then click run
##install.packages("alr4")
library(alr4)
## Warning: package 'alr4' was built under R version 3.5.3
## Loading required package: car
## Loading required package: carData
## Loading required package: effects
## Warning: package 'effects' was built under R version 3.5.3
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
head(fuel2001)
## Drivers FuelC Income Miles MPC Pop Tax
## AL 3559897 2382507 23471 94440 12737.00 3451586 18.0
## AK 472211 235400 30064 13628 7639.16 457728 8.0
## AZ 3550367 2428430 25578 55245 9411.55 3907526 18.0
## AR 1961883 1358174 22257 98132 11268.40 2072622 21.7
## CA 21623793 14691753 32275 168771 8923.89 25599275 18.0
## CO 3287922 2048664 32949 85854 9722.73 3322455 22.0
pairs(~Tax+Income+log(Miles)+FuelC,data=fuel2001)
pairs(~.,data=fuel2001)
fit<-lm(FuelC~.,data=fuel2001)
summary(fit)
##
## Call:
## lm(formula = FuelC ~ ., data = fuel2001)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1480910 -158802 19267 174208 1090089
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.902e+05 8.199e+05 -0.598 0.552983
## Drivers 6.368e-01 1.452e-01 4.386 7.09e-05 ***
## Income 7.690e+00 1.632e+01 0.471 0.639793
## Miles 5.850e+00 1.621e+00 3.608 0.000784 ***
## MPC 4.562e+01 3.565e+01 1.280 0.207337
## Pop -1.945e-02 1.245e-01 -0.156 0.876586
## Tax -2.087e+04 1.324e+04 -1.576 0.122235
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 398400 on 44 degrees of freedom
## Multiple R-squared: 0.9808, Adjusted R-squared: 0.9782
## F-statistic: 374.6 on 6 and 44 DF, p-value: < 2.2e-16
fit1<-lm(FuelC~Drivers+Miles,data=fuel2001)
summary(fit1)
##
## Call:
## lm(formula = FuelC ~ Drivers + Miles, data = fuel2001)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1667584 -207441 63143 156912 1055581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.207e+05 1.016e+05 -2.173 0.034783 *
## Drivers 6.121e-01 1.938e-02 31.578 < 2e-16 ***
## Miles 6.041e+00 1.460e+00 4.137 0.000141 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 406800 on 48 degrees of freedom
## Multiple R-squared: 0.9782, Adjusted R-squared: 0.9773
## F-statistic: 1075 on 2 and 48 DF, p-value: < 2.2e-16
plot(fit1)
9.2.2
library(readxl)
cell<- read_excel("C:/Users/zitao_000/Dropbox/homework/teaching/data/ds9.2.2-endothelial-cell-adherence.xls")
head(cell)
## # A tibble: 6 x 3
## Sample `Type A` `Type B`
## <dbl> <dbl> <dbl>
## 1 1 127 129
## 2 2 133 133
## 3 3 127 127
## 4 4 116 122
## 5 5 132 131
## 6 6 126 125
##paired t test
boxplot(cell$`Type A`,cell$`Type B`)
z=cell$`Type A`-cell$`Type B`
z.bar=mean(z)
pool.sample.variance=sd(z)
pool.sample.variance
## [1] 6.084117
\(H_0=\mu_z=\mu_A-\mu_B=0\)
\(H_A=\mu_z=\mu_A-\mu_B\neq0\)
n=14
t=(sqrt(n)*z.bar)/pool.sample.variance
t
## [1] -0.8346262
##p.value
2*pt(t,n-1)
## [1] 0.4190021
##Using confidence interval to check if 0 is in CI
t.score=qt(0.025,n-1)
t.score
## [1] -2.160369
moe=t.score*pool.sample.variance/sqrt(n)
left.pt=z.bar+moe
right.pt=z.bar-moe
ci=c(left.pt,right.pt)
ci
## [1] -4.870008 2.155722
###quick way
##first way:Paired t-test
t.test(cell$`Type A`,cell$`Type B`,paired=TRUE)
##
## Paired t-test
##
## data: cell$`Type A` and cell$`Type B`
## t = -0.83463, df = 13, p-value = 0.419
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.870008 2.155722
## sample estimates:
## mean of the differences
## -1.357143
##second way:One Sample t-test
z=cell$`Type A`-cell$`Type B`
t.test(z)
##
## One Sample t-test
##
## data: z
## t = -0.83463, df = 13, p-value = 0.419
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -4.870008 2.155722
## sample estimates:
## mean of x
## -1.357143
##Notice, if you take first way without choosing paired=TRUE
t.test(cell$`Type A`,cell$`Type B`)
##
## Welch Two Sample t-test
##
## data: cell$`Type A` and cell$`Type B`
## t = -0.50165, df = 24.134, p-value = 0.6205
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.939125 4.224840
## sample estimates:
## mean of x mean of y
## 128.3571 129.7143