knitr::opts_chunk$set(error=TRUE)

1a Response variable: presence of cancer and cancer recurrence in breast cancer patients with high insulin levels from being overweight

1b Factors: high insulin levels, and obesity

1c Replication could have been used, but not randomization because they are studying a specific disease, breast cancer so it wouldn’t be a completely randomized sample. However, replication was used because the experiment was conducted on a considerable amount of women (512) in the span of 10 years and can be repeated over time.

2a

boxplot(BMD~g, data=BMDdata, xlab="Treatment", ylab="Bone Mineral Density (BMD)", main="Kudzu Treatments on Bone Mineral Density")
## Error in eval(m$data, parent.frame()): object 'BMDdata' not found
mean(treat$control)
## Error in mean(treat$control): object 'treat' not found

2b Simple linear regression/One way ANOVA

2c

lm.BMD <- lm(BMD~g, data=BMDdata)
## Error in is.data.frame(data): object 'BMDdata' not found
anova(lm.BMD)
## Error in anova(lm.BMD): object 'lm.BMD' not found

2d

summary(lm.BMD)
## Error in summary(lm.BMD): object 'lm.BMD' not found

Because the p-value 0.00546 is less than α =0.05, we reject the null hypothesis. There is significant evidence that the groups differ with respect to mean BMD.

2e

control <- c(0.228, 0.207, 0.234, 0.220, 0.217, 0.228, 0.209, 0.221, 0.204, 0.220, 0.203, 0.219, 0.218, 0.245, 0.210)

lowdose <- c(0.211, 0.220, 0.221, 0.233, 0.219, 0.233, 0.226, 0.228, 0.216, 0.225, 0.200, 0.208, 0.198, 0.208, 0.203)

hidose <-  c(0.250, 0.237, 0.217, 0.206, 0.247, 0.228, 0.245, 0.232, 0.267, 0.261, 0.221, 0.219, 0.232, 0.209, 0.255)

t.test(control)
## 
##  One Sample t-test
## 
## data:  control
## t = 73.155, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.2124498 0.2252835
## sample estimates:
## mean of x 
## 0.2188667

We are 95% confident that the mean BMD for the control treatment group is between 0.2124498 and 0.2252835

t.test(lowdose)
## 
##  One Sample t-test
## 
## data:  lowdose
## t = 72.984, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.2102348 0.2229652
## sample estimates:
## mean of x 
##    0.2166

We are 95% confident that the mean BMD for the low dose treatment group is between 0.2102348 and 0.2229652.

t.test(hidose)
## 
##  One Sample t-test
## 
## data:  hidose
## t = 48.501, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.2246716 0.2454617
## sample estimates:
## mean of x 
## 0.2350667

We are 95% confident that the mean BMD for the high dose treatment group is between 0.2246716 and 0.2454617.

2f

avg.c <- mean(control)
avg.t <- mean(lowdose+hidose)
treatment.diff <- avg.t-avg.c
treatment.diff
## [1] 0.2328
t.test(control, lowdose+hidose, conf.level=0.99)
## 
##  Welch Two Sample t-test
## 
## data:  control and lowdose + hidose
## t = -36.594, df = 21.358, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 99 percent confidence interval:
##  -0.2507827 -0.2148173
## sample estimates:
## mean of x mean of y 
## 0.2188667 0.4516667

2g

path <- file.path("~","Desktop","CLASSES","PSTAT122","BMDdata.txt")
bmd <- read.table(path, header=T)
summary(bmd)
##       BMD             treat          g    
##  Min.   :0.1980   control:15   Min.   :1  
##  1st Qu.:0.2100   hidose :15   1st Qu.:1  
##  Median :0.2200   lowdose:15   Median :2  
##  Mean   :0.2233                Mean   :2  
##  3rd Qu.:0.2320                3rd Qu.:3  
##  Max.   :0.2670                Max.   :3
bmdano<-aov(BMD~as.factor(g),data=bmd)
s=split(bmd$BMD,as.factor(bmd$g))
summary(bmdano)
##              Df   Sum Sq   Mean Sq F value Pr(>F)   
## as.factor(g)  2 0.003186 0.0015928   7.718 0.0014 **
## Residuals    42 0.008668 0.0002064                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
le=tapply(bmd$BMD,bmd$g,length)
length(bmd$BMD);v=length(s);m=3;mse=0.0002064
## [1] 45
bd=sum(c(1,-1,0)*tapply(bmd$BMD,bmd$g,mean))-qt(0.05/(2*m),n-v,lower.tail=F)*(sqrt(mse*sum(c(1^2,1^2,0^2)*(c(1,1,1)/le))))
## Error in qt(0.05/(2 * m), n - v, lower.tail = F): object 'n' not found
bu=sum(c(1,-1,0)*tapply(bmd$BMD,bmd$g,mean))+qt(0.05/(2*m),n-v,lower.tail=F)*(sqrt(mse*sum(c(1^2))))
## Error in qt(0.05/(2 * m), n - v, lower.tail = F): object 'n' not found
print(paste('The CI is (',bd,",",bu,')'))
## Error in paste("The CI is (", bd, ",", bu, ")"): object 'bd' not found

2h High dose of Kudzu

2i

plot(lm.BMD)
## Error in plot(lm.BMD): object 'lm.BMD' not found

Assumptions violated: equal variance Assumptions met: independence, linearity, normality

2j

use MSE, plug in SSE
## Error: <text>:1:5: unexpected symbol
## 1: use MSE
##         ^

3a A set of contrasts I’d find particularly interesting would be to compare the reaction times of the two treatments, auditory and visual, given the same elapsed time between cue an stimulus. For example, I’d compare the treatment combinations 1&4, 2&5, and 3&6.

3b

path <- file.path("~","Desktop","CLASSES","PSTAT122","reaction.time.txt")
rxntime <- read.table(path, header=T)
fit <- lm(reaction_time$y~reaction_time$Trtmt)
## Error in eval(predvars, data, env): object 'reaction_time' not found
rxn.lm <- lm(y~Trtmt, data=rxntime)
plot(rxn.lm)