set.seed(1968)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(PropCIs)  #  from https://rcompanion.org/handbook/H_02.html

exercise 9.2.5

#To evaluate the policy of routine vaccination of infants for whooping cough, adverse reactions were monitored in 339 infants who received their first injection of vaccine. Reactions were noted in 69 of the infants.

#(a) Construct a 95% confidence interval for the probability of an adverse reaction to the vaccine.

scoreci(69, 339, conf.level = 0.95)  # scoreci(x, n, conf.level) Wilson’s confidence interval for a single proportion found at https://cran.r-project.org/web/packages/PropCIs/PropCIs.pdf
## 
## 
## 
## data:  
## 
## 95 percent confidence interval:
##  0.1641 0.2496

#(b) Interpret the confidence interval from part (a). What does the interval say about whooping cough vaccinations? #answer - we are 95% confident the probability of an adverse event during routine infant whooping cough vacccination lies in the interval 0.1641 to 0.2496.

#(c) Using your interval from part (a), can we be confident that the probability of an adverse reaction to the vaccine is less than 0.25? #answer - No, we are 95% confident Pr adverse vaccination reaction lies in the interval 0.2496 ~ 0.2496 but there remains a 5% chance that the assertion is incorrect implying that as more vaccinations are given over time that Pr of adverse reaction could approach .25 or 1/4 infant given a vaccine could experience a adverse reaction. #(d) What level of confidence is associated with your answer to part (c)? (Hint: What is the associated one sided interval confidence level?)

SE <- sqrt(0.2*(1-0.2)/343)
SE
## [1] 0.02159797
#upper limit of CI p_tidle + 1.645 * SE(p_tilde)
upperlimit <- (0.2+(1.645*0.02159))
upperlimit
## [1] 0.2355156

#answer - calculating the one side CI we note that upper limit is 0.23 therefore we can be 95% confident that adverse reaction proportion is less than 0.25.

exercise 9.4.4

#At a Midwestern hospital there were a total of 932 births in 20 consecutive weeks. Of these births, 216 occurred on weekends. Do these data reveal more than chance deviation from random timing of the births? Consider a goodness of fit test with two categories of births—weekday and weekend—using a nondirectional alternative.

null.probs= c(5/7, 2/7)
observebirths = c(716, 216)
chisq.test(observebirths, p=null.probs)
## 
##  Chi-squared test for given probabilities
## 
## data:  observebirths
## X-squared = 13.294, df = 1, p-value = 0.0002662

#(a) What is the value of the x2 test statistic? #answer - the x^2 statitistic is 268.24 with df = 1

#(b) In the context of this study, state the null and alternative hypotheses. #answer - H0: there is no difference in proportions for newborn birth on weekday vs weekend at a midwestern hospital; HA: there is a difference in proportions for newborn births weekday vs weekends at a midwestern hospital

#(c) The P-value for the test is 0.0003. If a = 0.05, what is your conclusion regarding the hypotheses in (b)? #answer - reject the null, there is strong evidence to support that there is a difference in newborn births at a midwestern hospital on weekdays vs weekends.

exercise 9.4.5

#In a breeding experiment, white chickens with small combs were mated and produced 190 offspring of the types shown in the accompanying table.27 Are these data consistent with the Mendelian expected ratios of 9:3:3:1 for the four types? Use a chi-square test at a = 0.10.

null.probs= c(9/16, 3/16, 3/16, 1/16)
observechicken = c(111,37,34,8)
chisq.test(observechicken, p=null.probs)
## 
##  Chi-squared test for given probabilities
## 
## data:  observechicken
## X-squared = 1.5509, df = 3, p-value = 0.6706

#answer fail to reject H0; there is insufficient evidence at the 0.1 significance level (p = 0.6706) that chicken feather color and comb size are dependent variables in a chicken breeding experiment. Further the results suggest that observed proportion do not follow the expected mendelian genetic proportion of 9:3:3:1.

10.2.5

#Can attack of a plant by one organism induce resistance to subsequent attack by a different organism? In a study of this question, individually potted cotton (Gossypium) plants were randomly allocated to two groups. Each plant in one group received an infestation of spider mites (Tetranychus); the other group were kept as controls. After 2 weeks the mites were removed, and all plants were inoculated with Verticillium, a fungus that causes wilt disease. The accompanying table shows the numbers of plants that developed symptoms of wilt disease. Do the data provide sufficient evidence to conclude that infestation with mites induces resistance to wilt disease? Use a chi-square test against a directional alternative following the five steps (a–e) in Exercise 10.2.4. Let a = 0.01.

cottonmatrix<-matrix(c(11,17,15,4),ncol=2,byrow=TRUE)
rownames(cottonmatrix)<-c("wilt","no_wilt")
colnames(cottonmatrix)<-c("mites","control")
cottonmatrix <- as.table(cottonmatrix)
cottonmatrix
##         mites control
## wilt       11      17
## no_wilt    15       4
#1 H0:  rate of wilting of Gossypium at 2 weeks is independent of fungal infection and mite infestation, HA: rate of wilting of Gossypium at 2 weeks is dependent on fungal infection and mite infestation, alpha = 0.01
#2 state and check conditions - a random sample of plants was used in the experiment, cell frequencies are >= 5 except one
chisq.test(cottonmatrix)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  cottonmatrix
## X-squared = 5.6885, df = 1, p-value = 0.01708
#since the question instructs to perform a directional test, we multiple the p value 0.0178 by 1/2 
direction <- (0.0178*0.5)
direction
## [1] 0.0089

#answer - the new p = 0.0089. We interpret this chi square test as follows: reject H0, there is sufficent evidence at the 0.01 level (p = 0.0089) to claim that fungal induced wilting disease at 2 weeks and prior mite infestation are dependent variables in the experiment involving Gossypium.

10.2.7

#Phenytoin is a standard anticonvulsant drug that unfortunately has many toxic side effects. A study was undertaken to compare phenytoin with valproate, another drug in the treatment of epilepsy. Patients were randomly allocated to receive either phenytoin or valproate for 12 months. Of 20 patients receiving valproate, 6 were free of seizures for the 12 months, while 6 of 17 patients receiving phenytoin were seizure free.

drugmatrix<-matrix(c(11,6,14,6),ncol=2,byrow=TRUE)
rownames(drugmatrix)<-c("phenytoin","valproate")
colnames(drugmatrix)<-c("seizure","no_seizure")
drugmatrix <- as.table(drugmatrix)
drugmatrix
##           seizure no_seizure
## phenytoin      11          6
## valproate      14          6

#(a) Consider a chi-square test to compare the seizure-free response rates for the two drugs using a nondirectional alternative.

#(i) State the null and alternative hypotheses in symbols. #H0: drug type and seizure rate are not different 12 months ; HA:drug type and seizure rates are different at 12 months, alpha = 0.01 #2 state and check conditions - a random sample of patients was used in the experiment, cell frequencies are >= 5

#(ii) What is the value of the test statistic?

chisq.test(drugmatrix)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  drugmatrix
## X-squared = 2.6469e-31, df = 1, p-value = 1

#answer the test statistic is X-squared = 2.6469e-31

#(iii) The P-value for the test is 0.73. If a = 0.10, what is your conclusion regarding the hypotheses in (ii)?

#answer - fail to reject the H0, there is insufficient evidence that 12 month seizure rates are different whether taking the drug phenytoin or valproate

#(b) Do your conclusions in part (a)(iii) provide evidence that valproate and phenytoin are equally effective in preventing seizures? Discuss.

#answer - the result of test suggest noninferiority between valproate and phenytoin in terms of the drugs ability to prevent seizures at 12 months. therefore the clinician might choose other variable in deciding which drug to prescribe to prevent seizure such as side effect profile or cost

exercise 10.3.3

#Men with prostate cancer were randomly assigned to undergo surgery (n = 347) or “watchful waiting” (no surgery, n = 348). Over the next several years there were 83 deaths in the first group and 106 deaths in the second group. The results are given in the table.

prostatematrix<-matrix(c(83,106,264,242),ncol=2,byrow=TRUE)
rownames(prostatematrix)<-c("Died","Alive")
colnames(prostatematrix)<-c("Surgery","WW")
prostatematrix <- as.table(prostatematrix)
prostatematrix
##       Surgery  WW
## Died       83 106
## Alive     264 242

#(a) Let D and A represent died and alive, respectively, and let S and WW represent surgery and watchful waiting. Calculate Pr{D|S} and Pr{D|WW}.

Prdiedsurg <- (83/(83+264))  #pr died given surgery 
Prdiedsurg
## [1] 0.2391931
Prdiedww <- (106/(106+242))
Prdiedww
## [1] 0.3045977

#(b) The value of the contingency-table chi-square statistic for these data is chi-square = 3.75. Test for a relationship between the treatment and survival. Use a nondirectional alternative and let a = 0.05.

#1 H0: death rates from prostate cancer are not related to treatment strategy surgery vs watchful waiting, HA: death rates from prostate cancer are related to treatment strategy surgery vs watchful waiting, alpha = 0.05
#2 state and check conditions - a random sample of patients was chosen in the study, cell frequencies are >= 5
chisq.test(prostatematrix)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  prostatematrix
## X-squared = 3.431, df = 1, p-value = 0.06399

#Answer - fail to reject H0; there is insufficient evidence to show that death rates are related to the type of treatment strategy surgery vs watchful waiting

exercise 10.9.1

#For each of the following tables, calculate (i) the relative risk and (ii) the odds ratio

table_a <- matrix(c(25,23,492,614),byrow=TRUE,nrow=2) # table a)
table_a
##      [,1] [,2]
## [1,]   25   23
## [2,]  492  614
#odd ratio for 2x2 table = (n11*n22)/(n12*n21)
odds_a <- (25*614)/(23*492)
odds_a
## [1] 1.356486
#relative risk for 2x2 table:  p_hat1 = Pr{a|a+c}
p_hat1a<- (25/(25+492))
p_hat1a
## [1] 0.0483559
p_hat2a <- (23/(23+614))   #p_hat2 = Pr{b|c+d}
p_hat2a
## [1] 0.03610675
table_b <- matrix(c(12,8,93,84),byrow=TRUE,nrow=2)  # table b)
table_b
##      [,1] [,2]
## [1,]   12    8
## [2,]   93   84
#odd ratio for 2x2 table = (n11*n22)/(n12*n21)
odds_b <- (12*84)/(93*8)
odds_b
## [1] 1.354839
#relative risk for 2x2 table:  p_hat1 = Pr{a|a+c}
p_hat1b<- (12/(12+93))
p_hat1b
## [1] 0.1142857
p_hat2b <- (8/(8+84))   #p_hat2 = Pr{b|c+d}
p_hat2b
## [1] 0.08695652

exercise 10.9.3

#Hip dysplasia is a hip socket abnormality that affects many large breed dogs. A review of medical records of dogs seen at 27 veterinary medical teaching hospitals found that hip dysplasia was more common in Golden Retrievers than in Border Collies; the data are shown in the following table. Calculate the relative risk of hip dysplasia for Golden Retrievers compared to Border Collies.

dogmatrix<-matrix(c(3995,221,42946,5007),ncol=2,byrow=TRUE)
rownames(dogmatrix)<-c("+hip dysplasia","-hip dysplasia")
colnames(dogmatrix)<-c("golden ret","border coll")
dogmatrix <- as.table(dogmatrix)
dogmatrix
##                golden ret border coll
## +hip dysplasia       3995         221
## -hip dysplasia      42946        5007
# RR is calculate pr_+HPgolden/pr_+HPborder
RR_GRvsBC <- (3995/(3995+42946))/(221/(221+5007))
RR_GRvsBC
## [1] 2.013297

#answer - the estimated relative risk of developing hip dyplasia is 2.0 times greater for golden retriever dogs vs. border collies.

exercise 10.9.4

#Consider the data from Exercise 10.9.3.

#(a) Calculate the sample value of the odds ratio.

#odd ratio for 2x2 table = (n11*n22)/(n12*n21)
odds_hipdysplasia <- (3995*5007)/(221*42946)
odds_hipdysplasia
## [1] 2.107557

#(b) Construct a 95% confidence interval for the population value of the odds ratio

#SE_ln_theta = sqrt(1/n11+1/n12+1/n21+1/n22)
SE_theta_hipdys <-sqrt(1/3995+1/221+1/42946+1/5007)
SE_theta_hipdys
## [1] 0.07069799
#CI theta_hat  = ln(theta_hat) +/-1.96*ln_SE_theta
CI_ln_theta_hat_upper <- log(odds_hipdysplasia)+1.96*SE_theta_hipdys
CI_ln_theta_hat_lower <- log(odds_hipdysplasia)-1.96*SE_theta_hipdys
CI_theta_lower <-2.718^CI_ln_theta_hat_lower
CI_theta_lower
## [1] 1.834732
CI_theta_upper <-2.718^CI_ln_theta_hat_upper
CI_theta_upper
## [1] 2.420577

#answer - the 95% CI theta for dog hip dysplasia is 1.834 to 2.421.

#(c) Interpret the confidence interval from part (b) in the context of this setting.

#answer - we are 95% confident that the population value of the odds ratio is between 1.834 to 2.421.