jp2 <- read.csv(‘https://raw.githubusercontent.com/taragonmd/data/master/drugrx-pearl2.csv’) str(jp2)

#Exercise 3.1

pR1.D0 <- (234+55)/(270+80)
pR1.D1 <- (81+192)/(87+263)
pR1.D0Gm <- 234/270
pR1.D1Gm <- 81/87
pR1.D0Gw <- 55/80
pR1.D1Gw <- 192/263
jp2 <- 
read.csv('https://raw.githubusercontent.com/taragonmd/data/master/drugrx-pearl2.csv')
str(jp2)
## 'data.frame':    700 obs. of  4 variables:
##  $ X        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Recovered: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Drug     : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Gender   : Factor w/ 2 levels "Men","Women": 1 1 1 1 1 1 1 1 1 1 ...
tab2.rdg <- xtabs(~ Recovered + Drug + Gender, data = jp2)
tab2.rd <- apply(tab2.rdg, c(1,2), sum)
pR1.D0 <- tab2.rd['Yes','No']/sum(tab2.rd[,'No'])
pR1.D1 <- tab2.rd['Yes','Yes']/sum(tab2.rd[,'Yes'])
pR1.D0Gm <- tab2.rdg['Yes','No','Men']/sum(tab2.rdg[,'No','Men'])
pR1.D1Gm <- tab2.rdg['Yes','Yes','Men']/sum(tab2.rdg[,'Yes','Men'])
pR1.D0Gw <- tab2.rdg['Yes','No','Women']/sum(tab2.rdg[,'No','Women'])
pR1.D1Gw <- tab2.rdg['Yes','Yes','Women']/sum(tab2.rdg[,'Yes','Women'])
prop.table(tab2.rd, 2)
##          Drug
## Recovered        No  Yes
##       No  0.1742857 0.22
##       Yes 0.8257143 0.78
prop.table(tab2.rdg, 2:3)
## , , Gender = Men
## 
##          Drug
## Recovered         No        Yes
##       No  0.13333333 0.06896552
##       Yes 0.86666667 0.93103448
## 
## , , Gender = Women
## 
##          Drug
## Recovered         No        Yes
##       No  0.31250000 0.26996198
##       Yes 0.68750000 0.73003802

#Exercise 3.2

#Ex. 3.2a

std89c <- 
read.csv("https://raw.githubusercontent.com/taragonmd/data/master/syphilis89c.csv")
str(std89c)
## 'data.frame':    44081 obs. of  3 variables:
##  $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Race: Factor w/ 3 levels "Black","Other",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Age : Factor w/ 8 levels "<=14",">55","15-19",..: 1 1 3 3 3 3 3 3 3 3 ...
head(std89c)
##    Sex  Race   Age
## 1 Male White  <=14
## 2 Male White  <=14
## 3 Male White 15-19
## 4 Male White 15-19
## 5 Male White 15-19
## 6 Male White 15-19
lapply(std89c, table)
## $Sex
## 
## Female   Male 
##  18075  26006 
## 
## $Race
## 
## Black Other White 
## 35508  3956  4617 
## 
## $Age
## 
##  <=14   >55 15-19 20-24 25-29 30-34 35-44 45-54 
##   230  1278  4378 10405  9610  8648  6901  2631

#Ex 3.2b

std89c<- read.csv(“https://raw.githubusercontent.com/taragonmd/data/master/syphilis89c.csv”, as.is = c(FALSE, FALSE, TRUE))

str(std89c)
## 'data.frame':    44081 obs. of  3 variables:
##  $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Race: Factor w/ 3 levels "Black","Other",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Age : Factor w/ 8 levels "<=14",">55","15-19",..: 1 1 3 3 3 3 3 3 3 3 ...
table(std89c$Age)
## 
##  <=14   >55 15-19 20-24 25-29 30-34 35-44 45-54 
##   230  1278  4378 10405  9610  8648  6901  2631
agelab<-c("<=14","15-19","20-24","25-29","30-34","35-44","45-54",">55")
std89c$Age <- factor(std89c$Age, levels = agelab)
table(std89c$Age)
## 
##  <=14 15-19 20-24 25-29 30-34 35-44 45-54   >55 
##   230  4378 10405  9610  8648  6901  2631  1278

#Ex 3.2c

table(std89c$Race, std89c$Age, std89c$Sex)
## , ,  = Female
## 
##        
##         <=14 15-19 20-24 25-29 30-34 35-44 45-54  >55
##   Black  165  2257  4503  3590  2628  1505   392   92
##   Other   11   158   307   283   167   149    40   15
##   White   14   253   475   433   316   243    55   24
## 
## , ,  = Male
## 
##        
##         <=14 15-19 20-24 25-29 30-34 35-44 45-54  >55
##   Black   31  1412  4059  4121  4453  3858  1619  823
##   Other    7   210   654   633   520   492   202  108
##   White    2    88   407   550   564   654   323  216
xtabs(~ Race + Age + Sex, data = std89c)
## , , Sex = Female
## 
##        Age
## Race    <=14 15-19 20-24 25-29 30-34 35-44 45-54  >55
##   Black  165  2257  4503  3590  2628  1505   392   92
##   Other   11   158   307   283   167   149    40   15
##   White   14   253   475   433   316   243    55   24
## 
## , , Sex = Male
## 
##        Age
## Race    <=14 15-19 20-24 25-29 30-34 35-44 45-54  >55
##   Black   31  1412  4059  4121  4453  3858  1619  823
##   Other    7   210   654   633   520   492   202  108
##   White    2    88   407   550   564   654   323  216
attach(std89c)
table(Race, Age, Sex)
## , , Sex = Female
## 
##        Age
## Race    <=14 15-19 20-24 25-29 30-34 35-44 45-54  >55
##   Black  165  2257  4503  3590  2628  1505   392   92
##   Other   11   158   307   283   167   149    40   15
##   White   14   253   475   433   316   243    55   24
## 
## , , Sex = Male
## 
##        Age
## Race    <=14 15-19 20-24 25-29 30-34 35-44 45-54  >55
##   Black   31  1412  4059  4121  4453  3858  1619  823
##   Other    7   210   654   633   520   492   202  108
##   White    2    88   407   550   564   654   323  216
xtabs(~ Race + Age + Sex)
## , , Sex = Female
## 
##        Age
## Race    <=14 15-19 20-24 25-29 30-34 35-44 45-54  >55
##   Black  165  2257  4503  3590  2628  1505   392   92
##   Other   11   158   307   283   167   149    40   15
##   White   14   253   475   433   316   243    55   24
## 
## , , Sex = Male
## 
##        Age
## Race    <=14 15-19 20-24 25-29 30-34 35-44 45-54  >55
##   Black   31  1412  4059  4121  4453  3858  1619  823
##   Other    7   210   654   633   520   492   202  108
##   White    2    88   407   550   564   654   323  216

#Exercise 3.3

tab.ars <- xtabs(~ Age + Race + Sex, data = std89c)
# 2-D tables
tab.ar <- apply(tab.ars, c(1, 2), sum); tab.ar
##        Race
## Age     Black Other White
##   <=14    196    18    16
##   15-19  3669   368   341
##   20-24  8562   961   882
##   25-29  7711   916   983
##   30-34  7081   687   880
##   35-44  5363   641   897
##   45-54  2011   242   378
##   >55     915   123   240
tab.as <- apply(tab.ars, c(1, 3), sum); tab.as
##        Sex
## Age     Female Male
##   <=14     190   40
##   15-19   2668 1710
##   20-24   5285 5120
##   25-29   4306 5304
##   30-34   3111 5537
##   35-44   1897 5004
##   45-54    487 2144
##   >55      131 1147
tab.rs <- apply(tab.ars, c(2, 3), sum); tab.rs
##        Sex
## Race    Female  Male
##   Black  15132 20376
##   Other   1130  2826
##   White   1813  2804
#### 1-D tables
tab.a <- apply(tab.ars, 1, sum); tab.a
##  <=14 15-19 20-24 25-29 30-34 35-44 45-54   >55 
##   230  4378 10405  9610  8648  6901  2631  1278
tab.r <- apply(tab.ars, 2, sum); tab.r
## Black Other White 
## 35508  3956  4617
tab.s <- apply(tab.ars, 3, sum); tab.s
## Female   Male 
##  18075  26006

#Exercise 3.4

tab.ars <- xtabs(~ Age + Race + Sex, data = std89c)
rowt <- apply(tab.ars, c(1, 3), sum) # row distrib
rowd <- sweep(tab.ars, c(1, 3), rowt, "/"); rowd
## , , Sex = Female
## 
##        Race
## Age          Black      Other      White
##   <=14  0.86842105 0.05789474 0.07368421
##   15-19 0.84595202 0.05922039 0.09482759
##   20-24 0.85203406 0.05808893 0.08987701
##   25-29 0.83372039 0.06572225 0.10055736
##   30-34 0.84474446 0.05368049 0.10157506
##   35-44 0.79335793 0.07854507 0.12809700
##   45-54 0.80492813 0.08213552 0.11293634
##   >55   0.70229008 0.11450382 0.18320611
## 
## , , Sex = Male
## 
##        Race
## Age          Black      Other      White
##   <=14  0.77500000 0.17500000 0.05000000
##   15-19 0.82573099 0.12280702 0.05146199
##   20-24 0.79277344 0.12773438 0.07949219
##   25-29 0.77696078 0.11934389 0.10369532
##   30-34 0.80422612 0.09391367 0.10186021
##   35-44 0.77098321 0.09832134 0.13069544
##   45-54 0.75513060 0.09421642 0.15065299
##   >55   0.71752398 0.09415867 0.18831735
apply(rowd, c(1, 3), sum) # confirm
##        Sex
## Age     Female Male
##   <=14       1    1
##   15-19      1    1
##   20-24      1    1
##   25-29      1    1
##   30-34      1    1
##   35-44      1    1
##   45-54      1    1
##   >55        1    1
colt <- apply(tab.ars, c(2, 3), sum) #  col distrib
cold <- sweep(tab.ars, c(2, 3), colt, "/"); cold
## , , Sex = Female
## 
##        Race
## Age            Black        Other        White
##   <=14  0.0109040444 0.0097345133 0.0077220077
##   15-19 0.1491541105 0.1398230088 0.1395477110
##   20-24 0.2975812847 0.2716814159 0.2619966906
##   25-29 0.2372455723 0.2504424779 0.2388306674
##   30-34 0.1736716891 0.1477876106 0.1742967457
##   35-44 0.0994581020 0.1318584071 0.1340319912
##   45-54 0.0259053661 0.0353982301 0.0303364589
##   >55   0.0060798308 0.0132743363 0.0132377275
## 
## , , Sex = Male
## 
##        Race
## Age            Black        Other        White
##   <=14  0.0015213977 0.0024769993 0.0007132668
##   15-19 0.0692972124 0.0743099788 0.0313837375
##   20-24 0.1992049470 0.2314225053 0.1451497860
##   25-29 0.2022477424 0.2239915074 0.1961483595
##   30-34 0.2185414213 0.1840056617 0.2011412268
##   35-44 0.1893404005 0.1740976645 0.2332382311
##   45-54 0.0794562230 0.0714791224 0.1151925820
##   >55   0.0403906557 0.0382165605 0.0770328103
apply(cold, c(2, 3), sum) # confirm
##        Sex
## Race    Female Male
##   Black      1    1
##   Other      1    1
##   White      1    1
jtt <- apply(tab.ars, 3, sum) # joint distrib
jtd <- sweep(tab.ars, 3, jtt, "/"); jtd
## , , Sex = Female
## 
##        Race
## Age            Black        Other        White
##   <=14  9.128631e-03 6.085754e-04 7.745505e-04
##   15-19 1.248686e-01 8.741355e-03 1.399723e-02
##   20-24 2.491286e-01 1.698479e-02 2.627939e-02
##   25-29 1.986169e-01 1.565698e-02 2.395574e-02
##   30-34 1.453942e-01 9.239281e-03 1.748271e-02
##   35-44 8.326418e-02 8.243430e-03 1.344398e-02
##   45-54 2.168741e-02 2.213001e-03 3.042877e-03
##   >55   5.089903e-03 8.298755e-04 1.327801e-03
## 
## , , Sex = Male
## 
##        Race
## Age            Black        Other        White
##   <=14  1.192033e-03 2.691687e-04 7.690533e-05
##   15-19 5.429516e-02 8.075060e-03 3.383834e-03
##   20-24 1.560794e-01 2.514804e-02 1.565023e-02
##   25-29 1.584634e-01 2.434054e-02 2.114897e-02
##   30-34 1.712297e-01 1.999539e-02 2.168730e-02
##   35-44 1.483504e-01 1.891871e-02 2.514804e-02
##   45-54 6.225486e-02 7.767438e-03 1.242021e-02
##   >55   3.164654e-02 4.152888e-03 8.305776e-03
apply(jtd, 3, sum) # confirm
## Female   Male 
##      1      1
distr <- list(rowd, cold, jtd); distr 
## [[1]]
## , , Sex = Female
## 
##        Race
## Age          Black      Other      White
##   <=14  0.86842105 0.05789474 0.07368421
##   15-19 0.84595202 0.05922039 0.09482759
##   20-24 0.85203406 0.05808893 0.08987701
##   25-29 0.83372039 0.06572225 0.10055736
##   30-34 0.84474446 0.05368049 0.10157506
##   35-44 0.79335793 0.07854507 0.12809700
##   45-54 0.80492813 0.08213552 0.11293634
##   >55   0.70229008 0.11450382 0.18320611
## 
## , , Sex = Male
## 
##        Race
## Age          Black      Other      White
##   <=14  0.77500000 0.17500000 0.05000000
##   15-19 0.82573099 0.12280702 0.05146199
##   20-24 0.79277344 0.12773438 0.07949219
##   25-29 0.77696078 0.11934389 0.10369532
##   30-34 0.80422612 0.09391367 0.10186021
##   35-44 0.77098321 0.09832134 0.13069544
##   45-54 0.75513060 0.09421642 0.15065299
##   >55   0.71752398 0.09415867 0.18831735
## 
## 
## [[2]]
## , , Sex = Female
## 
##        Race
## Age            Black        Other        White
##   <=14  0.0109040444 0.0097345133 0.0077220077
##   15-19 0.1491541105 0.1398230088 0.1395477110
##   20-24 0.2975812847 0.2716814159 0.2619966906
##   25-29 0.2372455723 0.2504424779 0.2388306674
##   30-34 0.1736716891 0.1477876106 0.1742967457
##   35-44 0.0994581020 0.1318584071 0.1340319912
##   45-54 0.0259053661 0.0353982301 0.0303364589
##   >55   0.0060798308 0.0132743363 0.0132377275
## 
## , , Sex = Male
## 
##        Race
## Age            Black        Other        White
##   <=14  0.0015213977 0.0024769993 0.0007132668
##   15-19 0.0692972124 0.0743099788 0.0313837375
##   20-24 0.1992049470 0.2314225053 0.1451497860
##   25-29 0.2022477424 0.2239915074 0.1961483595
##   30-34 0.2185414213 0.1840056617 0.2011412268
##   35-44 0.1893404005 0.1740976645 0.2332382311
##   45-54 0.0794562230 0.0714791224 0.1151925820
##   >55   0.0403906557 0.0382165605 0.0770328103
## 
## 
## [[3]]
## , , Sex = Female
## 
##        Race
## Age            Black        Other        White
##   <=14  9.128631e-03 6.085754e-04 7.745505e-04
##   15-19 1.248686e-01 8.741355e-03 1.399723e-02
##   20-24 2.491286e-01 1.698479e-02 2.627939e-02
##   25-29 1.986169e-01 1.565698e-02 2.395574e-02
##   30-34 1.453942e-01 9.239281e-03 1.748271e-02
##   35-44 8.326418e-02 8.243430e-03 1.344398e-02
##   45-54 2.168741e-02 2.213001e-03 3.042877e-03
##   >55   5.089903e-03 8.298755e-04 1.327801e-03
## 
## , , Sex = Male
## 
##        Race
## Age            Black        Other        White
##   <=14  1.192033e-03 2.691687e-04 7.690533e-05
##   15-19 5.429516e-02 8.075060e-03 3.383834e-03
##   20-24 1.560794e-01 2.514804e-02 1.565023e-02
##   25-29 1.584634e-01 2.434054e-02 2.114897e-02
##   30-34 1.712297e-01 1.999539e-02 2.168730e-02
##   35-44 1.483504e-01 1.891871e-02 2.514804e-02
##   45-54 6.225486e-02 7.767438e-03 1.242021e-02
##   >55   3.164654e-02 4.152888e-03 8.305776e-03

#Exercise 3.5

table(std89c)
## , , Age = <=14
## 
##         Race
## Sex      Black Other White
##   Female   165    11    14
##   Male      31     7     2
## 
## , , Age = 15-19
## 
##         Race
## Sex      Black Other White
##   Female  2257   158   253
##   Male    1412   210    88
## 
## , , Age = 20-24
## 
##         Race
## Sex      Black Other White
##   Female  4503   307   475
##   Male    4059   654   407
## 
## , , Age = 25-29
## 
##         Race
## Sex      Black Other White
##   Female  3590   283   433
##   Male    4121   633   550
## 
## , , Age = 30-34
## 
##         Race
## Sex      Black Other White
##   Female  2628   167   316
##   Male    4453   520   564
## 
## , , Age = 35-44
## 
##         Race
## Sex      Black Other White
##   Female  1505   149   243
##   Male    3858   492   654
## 
## , , Age = 45-54
## 
##         Race
## Sex      Black Other White
##   Female   392    40    55
##   Male    1619   202   323
## 
## , , Age = >55
## 
##         Race
## Sex      Black Other White
##   Female    92    15    24
##   Male     823   108   216
data.frame(table(std89c))
##       Sex  Race   Age Freq
## 1  Female Black  <=14  165
## 2    Male Black  <=14   31
## 3  Female Other  <=14   11
## 4    Male Other  <=14    7
## 5  Female White  <=14   14
## 6    Male White  <=14    2
## 7  Female Black 15-19 2257
## 8    Male Black 15-19 1412
## 9  Female Other 15-19  158
## 10   Male Other 15-19  210
## 11 Female White 15-19  253
## 12   Male White 15-19   88
## 13 Female Black 20-24 4503
## 14   Male Black 20-24 4059
## 15 Female Other 20-24  307
## 16   Male Other 20-24  654
## 17 Female White 20-24  475
## 18   Male White 20-24  407
## 19 Female Black 25-29 3590
## 20   Male Black 25-29 4121
## 21 Female Other 25-29  283
## 22   Male Other 25-29  633
## 23 Female White 25-29  433
## 24   Male White 25-29  550
## 25 Female Black 30-34 2628
## 26   Male Black 30-34 4453
## 27 Female Other 30-34  167
## 28   Male Other 30-34  520
## 29 Female White 30-34  316
## 30   Male White 30-34  564
## 31 Female Black 35-44 1505
## 32   Male Black 35-44 3858
## 33 Female Other 35-44  149
## 34   Male Other 35-44  492
## 35 Female White 35-44  243
## 36   Male White 35-44  654
## 37 Female Black 45-54  392
## 38   Male Black 45-54 1619
## 39 Female Other 45-54   40
## 40   Male Other 45-54  202
## 41 Female White 45-54   55
## 42   Male White 45-54  323
## 43 Female Black   >55   92
## 44   Male Black   >55  823
## 45 Female Other   >55   15
## 46   Male Other   >55  108
## 47 Female White   >55   24
## 48   Male White   >55  216

#Exercise 3.6

rep(c(1,2,3), c(4,5,6))
##  [1] 1 1 1 1 2 2 2 2 2 3 3 3 3 3 3

#Exercise 3.7

std89b <- 
read.csv("https://raw.githubusercontent.com/taragonmd/data/master/syphilis89b.csv",
as.is = c(FALSE, FALSE, TRUE, FALSE))
str(std89b)
## 'data.frame':    48 obs. of  4 variables:
##  $ Sex : Factor w/ 2 levels "Female","Male": 2 1 2 1 2 1 2 1 2 1 ...
##  $ Race: Factor w/ 3 levels "Black","Other",..: 3 3 1 1 2 2 3 3 1 1 ...
##  $ Age : chr  "<=14" "<=14" "<=14" "<=14" ...
##  $ Freq: int  2 14 31 165 7 11 88 253 1412 2257 ...
std89b
##       Sex  Race   Age Freq
## 1    Male White  <=14    2
## 2  Female White  <=14   14
## 3    Male Black  <=14   31
## 4  Female Black  <=14  165
## 5    Male Other  <=14    7
## 6  Female Other  <=14   11
## 7    Male White 15-19   88
## 8  Female White 15-19  253
## 9    Male Black 15-19 1412
## 10 Female Black 15-19 2257
## 11   Male Other 15-19  210
## 12 Female Other 15-19  158
## 13   Male White 20-24  407
## 14 Female White 20-24  475
## 15   Male Black 20-24 4059
## 16 Female Black 20-24 4503
## 17   Male Other 20-24  654
## 18 Female Other 20-24  307
## 19   Male White 25-29  550
## 20 Female White 25-29  433
## 21   Male Black 25-29 4121
## 22 Female Black 25-29 3590
## 23   Male Other 25-29  633
## 24 Female Other 25-29  283
## 25   Male White 30-34  564
## 26 Female White 30-34  316
## 27   Male Black 30-34 4453
## 28 Female Black 30-34 2628
## 29   Male Other 30-34  520
## 30 Female Other 30-34  167
## 31   Male White 35-44  654
## 32 Female White 35-44  243
## 33   Male Black 35-44 3858
## 34 Female Black 35-44 1505
## 35   Male Other 35-44  492
## 36 Female Other 35-44  149
## 37   Male White 45-54  323
## 38 Female White 45-54   55
## 39   Male Black 45-54 1619
## 40 Female Black 45-54  392
## 41   Male Other 45-54  202
## 42 Female Other 45-54   40
## 43   Male White   >55  216
## 44 Female White   >55   24
## 45   Male Black   >55  823
## 46 Female Black   >55   92
## 47   Male Other   >55  108
## 48 Female Other   >55   15
expand.rows <- rep(1:nrow(std89b),std89b$Freq)
std89b.df <- std89b[expand.rows,]
agelab<-c("<=14","15-19","20-24","25-29","30-34","35-44","45-54",">55")
std89b.df$Age <- factor(std89b.df$Age, levels = agelab)
str(std89b.df)
## 'data.frame':    44081 obs. of  4 variables:
##  $ Sex : Factor w/ 2 levels "Female","Male": 2 2 1 1 1 1 1 1 1 1 ...
##  $ Race: Factor w/ 3 levels "Black","Other",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Age : Factor w/ 8 levels "<=14","15-19",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq: int  2 2 14 14 14 14 14 14 14 14 ...
table(std89b.df$Age)
## 
##  <=14 15-19 20-24 25-29 30-34 35-44 45-54   >55 
##   230  4378 10405  9610  8648  6901  2631  1278