##Week 1

Significance test about a binomial proportion Example: 2016 General Social Survey

prop.test(x=837,n=1810, p=0.5, alternative ="two.sided" ,correct = FALSE) #"less", "greater
## 
##  1-sample proportions test without continuity correction
## 
## data:  837 out of 1810, null probability 0.5
## X-squared = 10.219, df = 1, p-value = 0.00139
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.4395653 0.4854557
## sample estimates:
##         p 
## 0.4624309
#we could add "conf.level = 0.95"

#confidence interval

Example, for n=10 and y=9, construct a 95% confidence interval using * Wilson’s method * The traditional methods

library(PropCIs)
scoreci(x=9, n=10, conf.level = 0.95) #Wilson's confidence interval 
## 
## 
## 
## data:  
## 
## 95 percent confidence interval:
##  0.5958 0.9821

Agresti-Coull

##Week 2

#Small sample binomial inference Video 2 Suppose Ho: pi=0.5 vs Ha: pi>0.5 y=9, n=10

library(PropCIs)
library(stats)
#It can be expressed as the sum of those two terms
dbinom(x=9,size=10,prob=0.5)+ dbinom(10,10,0.5)
## [1] 0.01074219
dbinom(x=0,size=30,prob=.30)+ dbinom(x=1,size=30,prob=.30)
## [1] 0.0003123309
dnorm(9,6.3,)
## [1] 0.01042093

#Small-sample discrete Inference is conservative Ho:pi=0.5 vs Ha: pi is not equal to 0.5 n=25

This is a two sided hypothesis so we need a two sided p-value

#If we observe y=7
2*pbinom(7,25,0.5)
## [1] 0.04328525
#If we observe y=8
2*pbinom(8,25,0.5)
## [1] 0.1077521

Properties fo Mid P-values (slide 39)

#py represents the pmf
#mpval is the mid p-value

#sum(py*mpval) #By doing this calculation, we are calculating the expectation of the mid p-value and it's equal to 0.5 

Video 3 The posterior 95% equal-tail credible interval alpha* = y + alpha= 837.5 beta* = n-y+beta = 973.5

library(stats)

round(qbeta(0.25, 837.5, 973.5), 3)#0.25 quartile gives the upper limit
## [1] 0.455
round(qbeta(0.975, 837.5, 973.5), 3)#0.75 gives the lower limit
## [1] 0.485

##Week 3
Video 2 Significance test for comparing two proportions

Example: Success = having heart attack p1=189/11034 (having heart attack in the placebo group) p2=104/11037 (having heart attack in the aspirin group)

prop.test(c(189,104), c(11034,11037), alternative = "two.sided", correct=FALSE)
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  c(189, 104) out of c(11034, 11037)
## X-squared = 25.014, df = 1, p-value = 5.692e-07
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.004687751 0.010724297
## sample estimates:
##     prop 1     prop 2 
## 0.01712887 0.00942285

Using another package.It uses binomial success counts and n for each group

library(PropCIs)

diffscoreci(189,11034, 104, 11037, conf.level = 0.95)
## 
## 
## 
## data:  
## 
## 95 percent confidence interval:
##  0.004716821 0.010788501

Video3 Relative risk

library(PropCIs)
riskscoreci(189,11034, 104, 11037,0.95)
## 
## 
## 
## data:  
## 
## 95 percent confidence interval:
##  1.433904 2.304713
#The risk of MI is at least 43% higher for the placebo group by looking at the lower limit f the CI
library(epitools)
oddsratio(c(189,11034, 104, 11037), method = "wald",
          conf=0.95,correct=FALSE)
## $data
##           Outcome
## Predictor  Disease1 Disease2 Total
##   Exposed1      189    11034 11223
##   Exposed2      104    11037 11141
##   Total         293    22071 22364
## 
## $measure
##           odds ratio with 95% C.I.
## Predictor  estimate    lower    upper
##   Exposed1 1.000000       NA       NA
##   Exposed2 1.817802 1.428868 2.312603
## 
## $p.value
##           two-sided
## Predictor    midp.exact fisher.exact   chi.square
##   Exposed1           NA           NA           NA
##   Exposed2 7.003039e-07 8.978571e-07 7.996161e-07
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# a95% CI is just the exp of the lower limit and exp of the upper limit
#this confidence interval doesn't contain 1, so the true odds of MI are different for the two groups
#The odds of MI for the placebo group are at least 44% higher for the aspirin group

For small samples, we use the score confidence interval

library(PropCIs)
orscoreci(189, 11034,104,11037, 0.95)
## 
## 
## 
## data:  
## 
## 95 percent confidence interval:
##  1.440802 2.329551

##Week 4 *Video 1

Example: Gender gap inpoliical affiliation

Political<-read.table("http://www.stat.ufl.edu/~aa/cat/data/Political.dat", header = TRUE)
head(Political)
##   person gender party
## 1      1 female   Dem
## 2      2 female   Dem
## 3      3 female   Dem
## 4      4 female   Dem
## 5      5 female   Dem
## 6      6 female   Dem
#Specify preferred order
Party<-factor(Political$party,levels = c("Dem", "Rep", "Ind"))
#categories for displays
GenderGap<-xtabs(~gender+Party, data = Political)
#forms contingency table
GenderGap
##         Party
## gender   Dem Rep Ind
##   female 495 272 590
##   male   330 265 498
margin.table(GenderGap,1) #gender
## gender
## female   male 
##   1357   1093
margin.table(GenderGap,2) #party affiliation
## Party
##  Dem  Rep  Ind 
##  825  537 1088
summary(GenderGap) #this summary would give us the test of independence 
## Call: xtabs(formula = ~gender + Party, data = Political)
## Number of cases in table: 2450 
## Number of factors: 2 
## Test for independence of all factors:
##  Chisq = 12.569, df = 2, p-value = 0.001865

Another way of doing the same example If we just have he total counts

F<-c(495,272,590)
M<-c(330,265,498)
gender.gap<-as.data.frame(rbind(F,M))
names(gender.gap)<-c("Dem", "Rep", "Ind")
gender.gap
##   Dem Rep Ind
## F 495 272 590
## M 330 265 498
fit<-chisq.test(gender.gap); fit
## 
##  Pearson's Chi-squared test
## 
## data:  gender.gap
## X-squared = 12.569, df = 2, p-value = 0.001865
round(fit$expected,1) #expected values for each cell
##     Dem   Rep   Ind
## F 456.9 297.4 602.6
## M 368.1 239.6 485.4
round(fit$stdres,2) #Standardized residuals
##     Dem  Rep   Ind
## F  3.27 -2.5 -1.03
## M -3.27  2.5  1.03

Video 3

#2x3 table  summing Dem+Rep
gender.gap1<-gender.gap[,-c(3)]
gender.gap1
##   Dem Rep
## F 495 272
## M 330 265
'Dem+Rep'<-gender.gap$Dem + gender.gap$Rep
`Dem+Rep`
## [1] 767 595
gender.gap2<-as.data.frame(cbind(`Dem+Rep`,gender.gap$Ind)) #Combine by column
names(gender.gap2)<-c("Rep+Dem","Ind")
rownames(gender.gap2)<-c("F", "M")
gender.gap2
##   Rep+Dem Ind
## F     767 590
## M     595 498
library(DescTools)

GTest(gender.gap) #Full table
## 
##  Log likelihood ratio (G-test) test of independence without correction
## 
## data:  gender.gap
## G = 12.601, X-squared df = 2, p-value = 0.001835
#There is strong evidence to say there is difference b/t females and males 

GTest(gender.gap1) #table with republicans and democrats alone
## 
##  Log likelihood ratio (G-test) test of independence without correction
## 
## data:  gender.gap1
## G = 11.536, X-squared df = 1, p-value = 0.0006827
#G^2= 11.536

GTest(gender.gap2) #table with columns Dem+Rep, Ind
## 
##  Log likelihood ratio (G-test) test of independence without correction
## 
## data:  gender.gap2
## G = 1.0652, X-squared df = 1, p-value = 0.302

Example: Alcohol use and Infant Malformation Explanatory: alcohol consumption (ordinal)-ranges/reponse levels are a bit subjective Response: Malformation (nominal)

c1<-c(17066,48)
c2<-c(14464, 38)
c3<-c(788, 5 )
c4<-c(126, 1)
c5<-c(37, 1)
alc<-as.data.frame(rbind(c1, c2,c3,c4,c5))
names(alc)<-c("Absent", "Present") 
alc 
##    Absent Present
## c1  17066      48
## c2  14464      38
## c3    788       5
## c4    126       1
## c5     37       1
col.sums<-colSums(alc)
row.sums<-rowSums(alc)
tot<-sum(col.sums)
row.probs<-row.sums/tot
col.probs<-col.sums/tot
                   
round(row.probs,3)
##    c1    c2    c3    c4    c5 
## 0.525 0.445 0.024 0.004 0.001
round(col.probs,3)
##  Absent Present 
##   0.997   0.003
u<-c(0, 0.5, 1.5, 4,7)
v<-c(0,1)

u.bar<-sum(row.probs*u)
v.bar<-sum(col.probs*v)

probs<-alc/tot

den<-sqrt(sum((u-u.bar)^2)*row.probs)*sum((v-v.bar)^2*col.probs)

num<-0
for (i in 1:5){
  for (j in 1:2){
    num<-num+(u[i]-u.bar)*(v[j]-v.bar)*probs[i,j]
  }
}
                   
num/den    
##         c1         c2         c3         c4         c5 
## 0.02139913 0.02324652 0.09941118 0.24841052 0.45412984
round(1-pchisq(6.57,1),3) #strong evidence of a nonzero correlation 
## [1] 0.01

We can do the sameby using pakage

library(vcdExtra)
## Loading required package: vcd
## Loading required package: grid
## 
## Attaching package: 'vcd'
## The following object is masked from 'package:epitools':
## 
##     oddsratio
## Loading required package: gnm
## 
## Attaching package: 'vcdExtra'
## The following object is masked from 'package:epitools':
## 
##     expand.table
Malform<-matrix(c(17066, 14464,788,126,37,48,38,5,1,1), ncol = 2)
CMHtest(Malform, rscores = c(0,0.5,1.5,4,7))
## Cochran-Mantel-Haenszel Statistics 
## 
##                  AltHypothesis   Chisq Df     Prob
## cor        Nonzero correlation  6.5699  1 0.010372
## rmeans  Row mean scores differ 12.0817  4 0.016754
## cmeans  Col mean scores differ  6.5699  1 0.010372
## general    General association 12.0817  4 0.016754

#Week 5 Video 1 Showed Fisher test ouput for the tea example but did not

Video 2 Tea example

library(epitools)
ormidp.test(3,1,1,3, or=1) #enter the four cell counts
##   one.sided two.sided
## 1 0.1285714 0.2571429
#We see the observed p-values for one and two-sided
library(exact2x2)
## Loading required package: exactci
## Loading required package: ssanv
## Loading required package: testthat
## 
## Attaching package: 'exactci'
## The following object is masked from 'package:epitools':
## 
##     binom.exact
#taster
#fisher.exact(taster)

#this is a useless confidence interval because it is too wide
library(PropCIs)
#posteriorintervval for odds ratio
orci.bayes(11,11,0,1,0.5, 0.5, 0.5, 0.5,0.95, nsim = 1e+06)
## [1] 3.276438e+00 1.361274e+06
#confidence interval for thedifferencebetween theproportions 
diffci.bayes(11,11,0,1,0.5, 0.5, 0.5, 0.5,0.95, nsim = 1e+06)
## [1] 0.09899729 0.99327276
library(stats)
pi1=rbeta(1e+8, 11.5, 0.5)
pi2=rbeta(1e+8, 0.5, 1.5)
or<-pi1*(1-pi2)/((1-pi1)*pi2) #Find odds ratio
quantile(or, c(0.25, 0.975))
##          25%        97.5% 
## 5.805198e+01 1.361927e+06
mean(pi1<pi2)
## [1] 0.0058584
dbinom(x=8,size=10,prob=0.5)+dbinom(x=9,size=10,prob=0.5)+ dbinom(10,10,0.5)
## [1] 0.0546875
x=(0.5*(dbinom(x=8,size=10,prob=0.5)))+ (1-(dbinom(x=8,size=10,prob=0.5)+dbinom(x=9,size=10,prob=0.5)+ dbinom(10,10,0.5)))
y=(0.5*(dbinom(x=8,size=10,prob=0.5)))+ ((dbinom(x=8,size=10,prob=0.5)+dbinom(x=9,size=10,prob=0.5)+ dbinom(10,10,0.5)))
y
## [1] 0.07666016
x+y
## [1] 1.043945
dbinom(x=9,size=10,prob=0.5)+ dbinom(10,10,0.5)
## [1] 0.01074219

Week 2, video 2

Week 3, video 3. ## ODDS RATIO large samples

library(epitools)
dat<-matrix(c(189, 10845, 104, 10933),2,2, byrow = TRUE)
dat
##      [,1]  [,2]
## [1,]  189 10845
## [2,]  104 10933
#odds
#rev = c("neither", "rows", "columns", "both")
#oddsratio(x=, y=, method = , conf.level=0.95, correction = FALSE)
#oddsratio(dat, method = "wald", conf.level=0.95, correction=FALSE)

#Interpretation
#This CI doesn't contain 1, thus the true odds of MI are different for the two groups
#The odds of MI for the placebo group are at least 44% HIGHER than for Aspiring group

ODDS RATIO small samples

#proportion 1 & proportion 2 are close to 0 or 1.
orscoreci(189, 11034, 104, 11037, 0.95)
## 
## 
## 
## data:  
## 
## 95 percent confidence interval:
##  1.440802 2.329551

Week 4, video 1 ##Example Gendergap in political affiliation

Political<-read.table("http://www.stat.ufl.edu/~aa/cat/data/Political.dat", header=TRUE)
head(Political)
##   person gender party
## 1      1 female   Dem
## 2      2 female   Dem
## 3      3 female   Dem
## 4      4 female   Dem
## 5      5 female   Dem
## 6      6 female   Dem
Party<-factor(Political$party, levels=c("Dem", "Rep", "Ind")) #level specifies preferred order of categories for displays

GenderGap<-xtabs(~gender + Party, data=Political)
#forms of contengency table
GenderGap
##         Party
## gender   Dem Rep Ind
##   female 495 272 590
##   male   330 265 498
margin.table(GenderGap,1)
## gender
## female   male 
##   1357   1093
margin.table(GenderGap,2)
## Party
##  Dem  Rep  Ind 
##  825  537 1088
summary(GenderGap)
## Call: xtabs(formula = ~gender + Party, data = Political)
## Number of cases in table: 2450 
## Number of factors: 2 
## Test for independence of all factors:
##  Chisq = 12.569, df = 2, p-value = 0.001865

Another way of creating contengency tables

F<-c(495, 272, 590)
M<-c(330, 265, 498)
gender.gap<-as.data.frame(rbind(F,M))
gender.gap
##    V1  V2  V3
## F 495 272 590
## M 330 265 498
names(gender.gap)<-c("Dem", "Rep", "Ind")
fit<-chisq.test(gender.gap)
fit
## 
##  Pearson's Chi-squared test
## 
## data:  gender.gap
## X-squared = 12.569, df = 2, p-value = 0.001865
stdres <- chisq.test(GenderGap)$stdres # standardized residuals
stdres
##         Party
## gender         Dem       Rep       Ind
##   female  3.272365 -2.498557 -1.032199
##   male   -3.272365  2.498557  1.032199
library(vcd)
#mosaic(diag, gp=shading_Friendly, residuals=stdres, # mosaic plot
#+ residuals_type="Std\nresiduals", labeling=labeling_residuals)

Standardized residuals

round(fit$stdres,2)
##     Dem  Rep   Ind
## F  3.27 -2.5 -1.03
## M -3.27  2.5  1.03

#Week 4, video 2

library(DescTools)
GTest(gender.gap1)
## 
##  Log likelihood ratio (G-test) test of independence without correction
## 
## data:  gender.gap1
## G = 11.536, X-squared df = 1, p-value = 0.0006827
GTest(gender.gap2)
## 
##  Log likelihood ratio (G-test) test of independence without correction
## 
## data:  gender.gap2
## G = 1.0652, X-squared df = 1, p-value = 0.302

#week 5,video,5

#cases<-data.frame(victim=factor(c(rep("W",515),rep("B",159)),levels = c("W", "B")), Defendant=factor(c(rep("W",515),rep("B",159)),levels = c("W", "B"))

#ctable<-xtabs(~victim+Defendant+y,data = cases)