LabA

knitr::opts_chunk$set(error=TRUE)

1a Response variable: presence of cancer and cancer recurrence in breast cancer patients with high insulin levels from being overweight

1b Factors: high insulin levels, and obesity

1c Replication could have been used, but not randomization because they are studying a specific disease, breast cancer so it wouldn’t be a completely randomized sample. However, replication was used because the experiment was conducted on a considerable amount of women (512) in the span of 10 years and can be repeated over time.

boxplot(BMD~g, data=BMDdata, xlab="Treatment", ylab="Bone Mineral Density (BMD)", main="Kudzu Treatments on Bone Mineral Density")

## Error in eval(m$data, parent.frame()): object 'BMDdata' not found

mean(treat$control)

## Error in mean(treat$control): object 'treat' not found

2b Simple linear regression/One way ANOVA

lm.BMD <- lm(BMD~g, data=BMDdata)

## Error in is.data.frame(data): object 'BMDdata' not found

anova(lm.BMD)

## Error in anova(lm.BMD): object 'lm.BMD' not found

summary(lm.BMD)

## Error in summary(lm.BMD): object 'lm.BMD' not found

Because the p-value 0.00546 is less than α =0.05, we reject the null hypothesis. There is significant evidence that the groups differ with respect to mean BMD.

control <- c(0.228, 0.207, 0.234, 0.220, 0.217, 0.228, 0.209, 0.221, 0.204, 0.220, 0.203, 0.219, 0.218, 0.245, 0.210)

lowdose <- c(0.211, 0.220, 0.221, 0.233, 0.219, 0.233, 0.226, 0.228, 0.216, 0.225, 0.200, 0.208, 0.198, 0.208, 0.203)

hidose <-  c(0.250, 0.237, 0.217, 0.206, 0.247, 0.228, 0.245, 0.232, 0.267, 0.261, 0.221, 0.219, 0.232, 0.209, 0.255)

t.test(control)

## 
##  One Sample t-test
## 
## data:  control
## t = 73.155, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.2124498 0.2252835
## sample estimates:
## mean of x 
## 0.2188667

We are 95% confident that the mean BMD for the control treatment group is between 0.2124498 and 0.2252835

t.test(lowdose)

## 
##  One Sample t-test
## 
## data:  lowdose
## t = 72.984, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.2102348 0.2229652
## sample estimates:
## mean of x 
##    0.2166

We are 95% confident that the mean BMD for the low dose treatment group is between 0.2102348 and 0.2229652.

t.test(hidose)

## 
##  One Sample t-test
## 
## data:  hidose
## t = 48.501, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.2246716 0.2454617
## sample estimates:
## mean of x 
## 0.2350667

We are 95% confident that the mean BMD for the high dose treatment group is between 0.2246716 and 0.2454617.

avg.c <- mean(control)
avg.t <- mean(lowdose+hidose)
treatment.diff <- avg.t-avg.c
treatment.diff

## [1] 0.2328

t.test(control, lowdose+hidose, conf.level=0.99)

## 
##  Welch Two Sample t-test
## 
## data:  control and lowdose + hidose
## t = -36.594, df = 21.358, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 99 percent confidence interval:
##  -0.2507827 -0.2148173
## sample estimates:
## mean of x mean of y 
## 0.2188667 0.4516667

path <- file.path("~","Desktop","CLASSES","PSTAT122","BMDdata.txt")
bmd <- read.table(path, header=T)
summary(bmd)

##       BMD             treat          g    
##  Min.   :0.1980   control:15   Min.   :1  
##  1st Qu.:0.2100   hidose :15   1st Qu.:1  
##  Median :0.2200   lowdose:15   Median :2  
##  Mean   :0.2233                Mean   :2  
##  3rd Qu.:0.2320                3rd Qu.:3  
##  Max.   :0.2670                Max.   :3

bmdano<-aov(BMD~as.factor(g),data=bmd)
s=split(bmd$BMD,as.factor(bmd$g))
summary(bmdano)

##              Df   Sum Sq   Mean Sq F value Pr(>F)   
## as.factor(g)  2 0.003186 0.0015928   7.718 0.0014 **
## Residuals    42 0.008668 0.0002064                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

le=tapply(bmd$BMD,bmd$g,length)
length(bmd$BMD);v=length(s);m=3;mse=0.0002064

## [1] 45

bd=sum(c(1,-1,0)*tapply(bmd$BMD,bmd$g,mean))-qt(0.05/(2*m),n-v,lower.tail=F)*(sqrt(mse*sum(c(1^2,1^2,0^2)*(c(1,1,1)/le))))

## Error in qt(0.05/(2 * m), n - v, lower.tail = F): object 'n' not found

bu=sum(c(1,-1,0)*tapply(bmd$BMD,bmd$g,mean))+qt(0.05/(2*m),n-v,lower.tail=F)*(sqrt(mse*sum(c(1^2))))

## Error in qt(0.05/(2 * m), n - v, lower.tail = F): object 'n' not found

print(paste('The CI is (',bd,",",bu,')'))

## Error in paste("The CI is (", bd, ",", bu, ")"): object 'bd' not found

2h High dose of Kudzu

plot(lm.BMD)

## Error in plot(lm.BMD): object 'lm.BMD' not found

Assumptions violated: equal variance Assumptions met: independence, linearity, normality

use MSE, plug in SSE

## Error: <text>:1:5: unexpected symbol
## 1: use MSE
##         ^

3a A set of contrasts I’d find particularly interesting would be to compare the reaction times of the two treatments, auditory and visual, given the same elapsed time between cue an stimulus. For example, I’d compare the treatment combinations 1&4, 2&5, and 3&6.

path <- file.path("~","Desktop","CLASSES","PSTAT122","reaction.time.txt")
rxntime <- read.table(path, header=T)
fit <- lm(reaction_time$y~reaction_time$Trtmt)

## Error in eval(predvars, data, env): object 'reaction_time' not found

rxn.lm <- lm(y~Trtmt, data=rxntime)
plot(rxn.lm)

summary(rxn.lm)

## 
## Call:
## lm(formula = y ~ Trtmt, data = rxntime)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.031127 -0.018984 -0.003079  0.018706  0.040968 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.158222   0.012109  13.067 5.94e-10 ***
## Trtmt       0.019952   0.003109   6.417 8.52e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02253 on 16 degrees of freedom
## Multiple R-squared:  0.7202, Adjusted R-squared:  0.7027 
## F-statistic: 41.18 on 1 and 16 DF,  p-value: 8.522e-06

y^ij = µ + τ^i + ε^ij, SLR model. We use the t statistic. Since the p-value is small, we reject the null hypothesis and conclude that there is significant evidence that the treatments have different effects on the reaction time.

#estimate the parameters
treatment1 <- c(0.204, 0.170, 0.181)
treatment2 <- c(0.167, 0.182, 0.187)
treatment3 <- c(0.202, 0.198, 0.236)
treatment4 <- c(0.257, 0.279, 0.269)
treatment5 <- c(0.283, 0.235, 0.260)
treatment6 <- c(0.256, 0.281, 0.258)
mean(y)

## Error in mean(y): object 'y' not found

mean(treatment1+treatment)

## Error in mean(treatment1 + treatment): object 'treatment' not found

µˆ =¯yi·· τˆi = ¯yi·· − y¯··· βˆj = ¯y·j· − y¯···

View(reaction_time)

## Error in as.data.frame(x): object 'reaction_time' not found

qf(0.05,3,15,lower.tail=F)

## [1] 3.287382

power1 <- as.factor(c(rep(c("1","2","3"),6)))
rtime <- c(0.256,0.281, 0.167,0.258,0.182,0.283,0.257,0.235,0.204,0.170,0.260,0.187,0.202,0.279,0.269,0.198,0.236,0.181)
rtime.aov <- aov(power1~rtime)

## Warning in model.response(mf, "numeric"): using type = "numeric" with a
## factor response will be ignored

## Warning in Ops.factor(y, z$residuals): '-' not meaningful for factors

plot(TukeyHSD(rtime.aov))

## Error in class(y) <- oldClass(x): adding class "factor" to an invalid object

anova.fit <- aov(y ~ Trtmt + Order, data = reaction_time)

## Error in terms.formula(formula, "Error", data = data): object 'reaction_time' not found

summary(anova.fit)

## Error in summary(anova.fit): object 'anova.fit' not found

Auditory and visual stimulus has no significant effect on response time

anova.fit2 <- aov(y ~ B + Order, data = reaction_time)

## Error in terms.formula(formula, "Error", data = data): object 'reaction_time' not found

summary(anova.fit)

## Error in summary(anova.fit): object 'anova.fit' not found

Elapsed time has no significant effect on response time

Sorry, I understand the material but I just was not able to code it

LabA

Becky Su

1/31/2019