knitr::opts_chunk$set(error=TRUE)
1a Response variable: presence of cancer and cancer recurrence in breast cancer patients with high insulin levels from being overweight
1b Factors: high insulin levels, and obesity
1c Replication could have been used, but not randomization because they are studying a specific disease, breast cancer so it wouldn’t be a completely randomized sample. However, replication was used because the experiment was conducted on a considerable amount of women (512) in the span of 10 years and can be repeated over time.
2a
boxplot(BMD~g, data=BMDdata, xlab="Treatment", ylab="Bone Mineral Density (BMD)", main="Kudzu Treatments on Bone Mineral Density")
## Error in eval(m$data, parent.frame()): object 'BMDdata' not found
mean(treat$control)
## Error in mean(treat$control): object 'treat' not found
2b Simple linear regression/One way ANOVA
2c
lm.BMD <- lm(BMD~g, data=BMDdata)
## Error in is.data.frame(data): object 'BMDdata' not found
anova(lm.BMD)
## Error in anova(lm.BMD): object 'lm.BMD' not found
2d
summary(lm.BMD)
## Error in summary(lm.BMD): object 'lm.BMD' not found
Because the p-value 0.00546 is less than α =0.05, we reject the null hypothesis. There is significant evidence that the groups differ with respect to mean BMD.
2e
control <- c(0.228, 0.207, 0.234, 0.220, 0.217, 0.228, 0.209, 0.221, 0.204, 0.220, 0.203, 0.219, 0.218, 0.245, 0.210)
lowdose <- c(0.211, 0.220, 0.221, 0.233, 0.219, 0.233, 0.226, 0.228, 0.216, 0.225, 0.200, 0.208, 0.198, 0.208, 0.203)
hidose <- c(0.250, 0.237, 0.217, 0.206, 0.247, 0.228, 0.245, 0.232, 0.267, 0.261, 0.221, 0.219, 0.232, 0.209, 0.255)
t.test(control)
##
## One Sample t-test
##
## data: control
## t = 73.155, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.2124498 0.2252835
## sample estimates:
## mean of x
## 0.2188667
We are 95% confident that the mean BMD for the control treatment group is between 0.2124498 and 0.2252835
t.test(lowdose)
##
## One Sample t-test
##
## data: lowdose
## t = 72.984, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.2102348 0.2229652
## sample estimates:
## mean of x
## 0.2166
We are 95% confident that the mean BMD for the low dose treatment group is between 0.2102348 and 0.2229652.
t.test(hidose)
##
## One Sample t-test
##
## data: hidose
## t = 48.501, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.2246716 0.2454617
## sample estimates:
## mean of x
## 0.2350667
We are 95% confident that the mean BMD for the high dose treatment group is between 0.2246716 and 0.2454617.
2f
avg.c <- mean(control)
avg.t <- mean(lowdose+hidose)
treatment.diff <- avg.t-avg.c
treatment.diff
## [1] 0.2328
t.test(control, lowdose+hidose, conf.level=0.99)
##
## Welch Two Sample t-test
##
## data: control and lowdose + hidose
## t = -36.594, df = 21.358, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 99 percent confidence interval:
## -0.2507827 -0.2148173
## sample estimates:
## mean of x mean of y
## 0.2188667 0.4516667
2g
path <- file.path("~","Desktop","CLASSES","PSTAT122","BMDdata.txt")
bmd <- read.table(path, header=T)
summary(bmd)
## BMD treat g
## Min. :0.1980 control:15 Min. :1
## 1st Qu.:0.2100 hidose :15 1st Qu.:1
## Median :0.2200 lowdose:15 Median :2
## Mean :0.2233 Mean :2
## 3rd Qu.:0.2320 3rd Qu.:3
## Max. :0.2670 Max. :3
bmdano<-aov(BMD~as.factor(g),data=bmd)
s=split(bmd$BMD,as.factor(bmd$g))
summary(bmdano)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(g) 2 0.003186 0.0015928 7.718 0.0014 **
## Residuals 42 0.008668 0.0002064
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
le=tapply(bmd$BMD,bmd$g,length)
length(bmd$BMD);v=length(s);m=3;mse=0.0002064
## [1] 45
bd=sum(c(1,-1,0)*tapply(bmd$BMD,bmd$g,mean))-qt(0.05/(2*m),n-v,lower.tail=F)*(sqrt(mse*sum(c(1^2,1^2,0^2)*(c(1,1,1)/le))))
## Error in qt(0.05/(2 * m), n - v, lower.tail = F): object 'n' not found
bu=sum(c(1,-1,0)*tapply(bmd$BMD,bmd$g,mean))+qt(0.05/(2*m),n-v,lower.tail=F)*(sqrt(mse*sum(c(1^2))))
## Error in qt(0.05/(2 * m), n - v, lower.tail = F): object 'n' not found
print(paste('The CI is (',bd,",",bu,')'))
## Error in paste("The CI is (", bd, ",", bu, ")"): object 'bd' not found
2h High dose of Kudzu
2i
plot(lm.BMD)
## Error in plot(lm.BMD): object 'lm.BMD' not found
Assumptions violated: equal variance Assumptions met: independence, linearity, normality
2j
use MSE, plug in SSE
## Error: <text>:1:5: unexpected symbol
## 1: use MSE
## ^
3a A set of contrasts I’d find particularly interesting would be to compare the reaction times of the two treatments, auditory and visual, given the same elapsed time between cue an stimulus. For example, I’d compare the treatment combinations 1&4, 2&5, and 3&6.
3b
path <- file.path("~","Desktop","CLASSES","PSTAT122","reaction.time.txt")
rxntime <- read.table(path, header=T)
fit <- lm(reaction_time$y~reaction_time$Trtmt)
## Error in eval(predvars, data, env): object 'reaction_time' not found
rxn.lm <- lm(y~Trtmt, data=rxntime)
plot(rxn.lm)
3c
summary(rxn.lm)
##
## Call:
## lm(formula = y ~ Trtmt, data = rxntime)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.031127 -0.018984 -0.003079 0.018706 0.040968
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.158222 0.012109 13.067 5.94e-10 ***
## Trtmt 0.019952 0.003109 6.417 8.52e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02253 on 16 degrees of freedom
## Multiple R-squared: 0.7202, Adjusted R-squared: 0.7027
## F-statistic: 41.18 on 1 and 16 DF, p-value: 8.522e-06
y^ij = µ + τ^i + ε^ij, SLR model. We use the t statistic. Since the p-value is small, we reject the null hypothesis and conclude that there is significant evidence that the treatments have different effects on the reaction time.
3d
#estimate the parameters
treatment1 <- c(0.204, 0.170, 0.181)
treatment2 <- c(0.167, 0.182, 0.187)
treatment3 <- c(0.202, 0.198, 0.236)
treatment4 <- c(0.257, 0.279, 0.269)
treatment5 <- c(0.283, 0.235, 0.260)
treatment6 <- c(0.256, 0.281, 0.258)
mean(y)
## Error in mean(y): object 'y' not found
mean(treatment1+treatment)
## Error in mean(treatment1 + treatment): object 'treatment' not found
µˆ =¯yi·· τˆi = ¯yi·· − y¯··· βˆj = ¯y·j· − y¯···
3e
View(reaction_time)
## Error in as.data.frame(x): object 'reaction_time' not found
qf(0.05,3,15,lower.tail=F)
## [1] 3.287382
power1 <- as.factor(c(rep(c("1","2","3"),6)))
rtime <- c(0.256,0.281, 0.167,0.258,0.182,0.283,0.257,0.235,0.204,0.170,0.260,0.187,0.202,0.279,0.269,0.198,0.236,0.181)
rtime.aov <- aov(power1~rtime)
## Warning in model.response(mf, "numeric"): using type = "numeric" with a
## factor response will be ignored
## Warning in Ops.factor(y, z$residuals): '-' not meaningful for factors
plot(TukeyHSD(rtime.aov))
## Error in class(y) <- oldClass(x): adding class "factor" to an invalid object
3f
3g
anova.fit <- aov(y ~ Trtmt + Order, data = reaction_time)
## Error in terms.formula(formula, "Error", data = data): object 'reaction_time' not found
summary(anova.fit)
## Error in summary(anova.fit): object 'anova.fit' not found
Auditory and visual stimulus has no significant effect on response time
anova.fit2 <- aov(y ~ B + Order, data = reaction_time)
## Error in terms.formula(formula, "Error", data = data): object 'reaction_time' not found
summary(anova.fit)
## Error in summary(anova.fit): object 'anova.fit' not found
Elapsed time has no significant effect on response time
3h
3i
3j
Sorry, I understand the material but I just was not able to code it