jp2 <- read.csv(‘https://raw.githubusercontent.com/taragonmd/data/master/drugrx-pearl2.csv’) str(jp2)
#Exercise 3.1
pR1.D0 <- (234+55)/(270+80)
pR1.D1 <- (81+192)/(87+263)
pR1.D0Gm <- 234/270
pR1.D1Gm <- 81/87
pR1.D0Gw <- 55/80
pR1.D1Gw <- 192/263
jp2 <-
read.csv('https://raw.githubusercontent.com/taragonmd/data/master/drugrx-pearl2.csv')
str(jp2)
## 'data.frame': 700 obs. of 4 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Recovered: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Drug : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Gender : Factor w/ 2 levels "Men","Women": 1 1 1 1 1 1 1 1 1 1 ...
tab2.rdg <- xtabs(~ Recovered + Drug + Gender, data = jp2)
tab2.rd <- apply(tab2.rdg, c(1,2), sum)
pR1.D0 <- tab2.rd['Yes','No']/sum(tab2.rd[,'No'])
pR1.D1 <- tab2.rd['Yes','Yes']/sum(tab2.rd[,'Yes'])
pR1.D0Gm <- tab2.rdg['Yes','No','Men']/sum(tab2.rdg[,'No','Men'])
pR1.D1Gm <- tab2.rdg['Yes','Yes','Men']/sum(tab2.rdg[,'Yes','Men'])
pR1.D0Gw <- tab2.rdg['Yes','No','Women']/sum(tab2.rdg[,'No','Women'])
pR1.D1Gw <- tab2.rdg['Yes','Yes','Women']/sum(tab2.rdg[,'Yes','Women'])
prop.table(tab2.rd, 2)
## Drug
## Recovered No Yes
## No 0.1742857 0.22
## Yes 0.8257143 0.78
prop.table(tab2.rdg, 2:3)
## , , Gender = Men
##
## Drug
## Recovered No Yes
## No 0.13333333 0.06896552
## Yes 0.86666667 0.93103448
##
## , , Gender = Women
##
## Drug
## Recovered No Yes
## No 0.31250000 0.26996198
## Yes 0.68750000 0.73003802
#Exercise 3.2
#Ex. 3.2a
std89c <-
read.csv("https://raw.githubusercontent.com/taragonmd/data/master/syphilis89c.csv")
str(std89c)
## 'data.frame': 44081 obs. of 3 variables:
## $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Race: Factor w/ 3 levels "Black","Other",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Age : Factor w/ 8 levels "<=14",">55","15-19",..: 1 1 3 3 3 3 3 3 3 3 ...
head(std89c)
## Sex Race Age
## 1 Male White <=14
## 2 Male White <=14
## 3 Male White 15-19
## 4 Male White 15-19
## 5 Male White 15-19
## 6 Male White 15-19
lapply(std89c, table)
## $Sex
##
## Female Male
## 18075 26006
##
## $Race
##
## Black Other White
## 35508 3956 4617
##
## $Age
##
## <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## 230 1278 4378 10405 9610 8648 6901 2631
#Ex 3.2b
std89c<- read.csv(“https://raw.githubusercontent.com/taragonmd/data/master/syphilis89c.csv”, as.is = c(FALSE, FALSE, TRUE))
str(std89c)
## 'data.frame': 44081 obs. of 3 variables:
## $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Race: Factor w/ 3 levels "Black","Other",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Age : Factor w/ 8 levels "<=14",">55","15-19",..: 1 1 3 3 3 3 3 3 3 3 ...
table(std89c$Age)
##
## <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## 230 1278 4378 10405 9610 8648 6901 2631
agelab<-c("<=14","15-19","20-24","25-29","30-34","35-44","45-54",">55")
std89c$Age <- factor(std89c$Age, levels = agelab)
table(std89c$Age)
##
## <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## 230 4378 10405 9610 8648 6901 2631 1278
#Ex 3.2c
table(std89c$Race, std89c$Age, std89c$Sex)
## , , = Female
##
##
## <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## Black 165 2257 4503 3590 2628 1505 392 92
## Other 11 158 307 283 167 149 40 15
## White 14 253 475 433 316 243 55 24
##
## , , = Male
##
##
## <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## Black 31 1412 4059 4121 4453 3858 1619 823
## Other 7 210 654 633 520 492 202 108
## White 2 88 407 550 564 654 323 216
xtabs(~ Race + Age + Sex, data = std89c)
## , , Sex = Female
##
## Age
## Race <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## Black 165 2257 4503 3590 2628 1505 392 92
## Other 11 158 307 283 167 149 40 15
## White 14 253 475 433 316 243 55 24
##
## , , Sex = Male
##
## Age
## Race <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## Black 31 1412 4059 4121 4453 3858 1619 823
## Other 7 210 654 633 520 492 202 108
## White 2 88 407 550 564 654 323 216
attach(std89c)
table(Race, Age, Sex)
## , , Sex = Female
##
## Age
## Race <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## Black 165 2257 4503 3590 2628 1505 392 92
## Other 11 158 307 283 167 149 40 15
## White 14 253 475 433 316 243 55 24
##
## , , Sex = Male
##
## Age
## Race <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## Black 31 1412 4059 4121 4453 3858 1619 823
## Other 7 210 654 633 520 492 202 108
## White 2 88 407 550 564 654 323 216
xtabs(~ Race + Age + Sex)
## , , Sex = Female
##
## Age
## Race <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## Black 165 2257 4503 3590 2628 1505 392 92
## Other 11 158 307 283 167 149 40 15
## White 14 253 475 433 316 243 55 24
##
## , , Sex = Male
##
## Age
## Race <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## Black 31 1412 4059 4121 4453 3858 1619 823
## Other 7 210 654 633 520 492 202 108
## White 2 88 407 550 564 654 323 216
#Exercise 3.3
tab.ars <- xtabs(~ Age + Race + Sex, data = std89c)
# 2-D tables
tab.ar <- apply(tab.ars, c(1, 2), sum); tab.ar
## Race
## Age Black Other White
## <=14 196 18 16
## 15-19 3669 368 341
## 20-24 8562 961 882
## 25-29 7711 916 983
## 30-34 7081 687 880
## 35-44 5363 641 897
## 45-54 2011 242 378
## >55 915 123 240
tab.as <- apply(tab.ars, c(1, 3), sum); tab.as
## Sex
## Age Female Male
## <=14 190 40
## 15-19 2668 1710
## 20-24 5285 5120
## 25-29 4306 5304
## 30-34 3111 5537
## 35-44 1897 5004
## 45-54 487 2144
## >55 131 1147
tab.rs <- apply(tab.ars, c(2, 3), sum); tab.rs
## Sex
## Race Female Male
## Black 15132 20376
## Other 1130 2826
## White 1813 2804
#### 1-D tables
tab.a <- apply(tab.ars, 1, sum); tab.a
## <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## 230 4378 10405 9610 8648 6901 2631 1278
tab.r <- apply(tab.ars, 2, sum); tab.r
## Black Other White
## 35508 3956 4617
tab.s <- apply(tab.ars, 3, sum); tab.s
## Female Male
## 18075 26006
#Exercise 3.4
tab.ars <- xtabs(~ Age + Race + Sex, data = std89c)
rowt <- apply(tab.ars, c(1, 3), sum) # row distrib
rowd <- sweep(tab.ars, c(1, 3), rowt, "/"); rowd
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 0.86842105 0.05789474 0.07368421
## 15-19 0.84595202 0.05922039 0.09482759
## 20-24 0.85203406 0.05808893 0.08987701
## 25-29 0.83372039 0.06572225 0.10055736
## 30-34 0.84474446 0.05368049 0.10157506
## 35-44 0.79335793 0.07854507 0.12809700
## 45-54 0.80492813 0.08213552 0.11293634
## >55 0.70229008 0.11450382 0.18320611
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 0.77500000 0.17500000 0.05000000
## 15-19 0.82573099 0.12280702 0.05146199
## 20-24 0.79277344 0.12773438 0.07949219
## 25-29 0.77696078 0.11934389 0.10369532
## 30-34 0.80422612 0.09391367 0.10186021
## 35-44 0.77098321 0.09832134 0.13069544
## 45-54 0.75513060 0.09421642 0.15065299
## >55 0.71752398 0.09415867 0.18831735
apply(rowd, c(1, 3), sum) # confirm
## Sex
## Age Female Male
## <=14 1 1
## 15-19 1 1
## 20-24 1 1
## 25-29 1 1
## 30-34 1 1
## 35-44 1 1
## 45-54 1 1
## >55 1 1
colt <- apply(tab.ars, c(2, 3), sum) # col distrib
cold <- sweep(tab.ars, c(2, 3), colt, "/"); cold
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 0.0109040444 0.0097345133 0.0077220077
## 15-19 0.1491541105 0.1398230088 0.1395477110
## 20-24 0.2975812847 0.2716814159 0.2619966906
## 25-29 0.2372455723 0.2504424779 0.2388306674
## 30-34 0.1736716891 0.1477876106 0.1742967457
## 35-44 0.0994581020 0.1318584071 0.1340319912
## 45-54 0.0259053661 0.0353982301 0.0303364589
## >55 0.0060798308 0.0132743363 0.0132377275
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 0.0015213977 0.0024769993 0.0007132668
## 15-19 0.0692972124 0.0743099788 0.0313837375
## 20-24 0.1992049470 0.2314225053 0.1451497860
## 25-29 0.2022477424 0.2239915074 0.1961483595
## 30-34 0.2185414213 0.1840056617 0.2011412268
## 35-44 0.1893404005 0.1740976645 0.2332382311
## 45-54 0.0794562230 0.0714791224 0.1151925820
## >55 0.0403906557 0.0382165605 0.0770328103
apply(cold, c(2, 3), sum) # confirm
## Sex
## Race Female Male
## Black 1 1
## Other 1 1
## White 1 1
jtt <- apply(tab.ars, 3, sum) # joint distrib
jtd <- sweep(tab.ars, 3, jtt, "/"); jtd
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 9.128631e-03 6.085754e-04 7.745505e-04
## 15-19 1.248686e-01 8.741355e-03 1.399723e-02
## 20-24 2.491286e-01 1.698479e-02 2.627939e-02
## 25-29 1.986169e-01 1.565698e-02 2.395574e-02
## 30-34 1.453942e-01 9.239281e-03 1.748271e-02
## 35-44 8.326418e-02 8.243430e-03 1.344398e-02
## 45-54 2.168741e-02 2.213001e-03 3.042877e-03
## >55 5.089903e-03 8.298755e-04 1.327801e-03
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 1.192033e-03 2.691687e-04 7.690533e-05
## 15-19 5.429516e-02 8.075060e-03 3.383834e-03
## 20-24 1.560794e-01 2.514804e-02 1.565023e-02
## 25-29 1.584634e-01 2.434054e-02 2.114897e-02
## 30-34 1.712297e-01 1.999539e-02 2.168730e-02
## 35-44 1.483504e-01 1.891871e-02 2.514804e-02
## 45-54 6.225486e-02 7.767438e-03 1.242021e-02
## >55 3.164654e-02 4.152888e-03 8.305776e-03
apply(jtd, 3, sum) # confirm
## Female Male
## 1 1
distr <- list(rowd, cold, jtd); distr
## [[1]]
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 0.86842105 0.05789474 0.07368421
## 15-19 0.84595202 0.05922039 0.09482759
## 20-24 0.85203406 0.05808893 0.08987701
## 25-29 0.83372039 0.06572225 0.10055736
## 30-34 0.84474446 0.05368049 0.10157506
## 35-44 0.79335793 0.07854507 0.12809700
## 45-54 0.80492813 0.08213552 0.11293634
## >55 0.70229008 0.11450382 0.18320611
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 0.77500000 0.17500000 0.05000000
## 15-19 0.82573099 0.12280702 0.05146199
## 20-24 0.79277344 0.12773438 0.07949219
## 25-29 0.77696078 0.11934389 0.10369532
## 30-34 0.80422612 0.09391367 0.10186021
## 35-44 0.77098321 0.09832134 0.13069544
## 45-54 0.75513060 0.09421642 0.15065299
## >55 0.71752398 0.09415867 0.18831735
##
##
## [[2]]
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 0.0109040444 0.0097345133 0.0077220077
## 15-19 0.1491541105 0.1398230088 0.1395477110
## 20-24 0.2975812847 0.2716814159 0.2619966906
## 25-29 0.2372455723 0.2504424779 0.2388306674
## 30-34 0.1736716891 0.1477876106 0.1742967457
## 35-44 0.0994581020 0.1318584071 0.1340319912
## 45-54 0.0259053661 0.0353982301 0.0303364589
## >55 0.0060798308 0.0132743363 0.0132377275
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 0.0015213977 0.0024769993 0.0007132668
## 15-19 0.0692972124 0.0743099788 0.0313837375
## 20-24 0.1992049470 0.2314225053 0.1451497860
## 25-29 0.2022477424 0.2239915074 0.1961483595
## 30-34 0.2185414213 0.1840056617 0.2011412268
## 35-44 0.1893404005 0.1740976645 0.2332382311
## 45-54 0.0794562230 0.0714791224 0.1151925820
## >55 0.0403906557 0.0382165605 0.0770328103
##
##
## [[3]]
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 9.128631e-03 6.085754e-04 7.745505e-04
## 15-19 1.248686e-01 8.741355e-03 1.399723e-02
## 20-24 2.491286e-01 1.698479e-02 2.627939e-02
## 25-29 1.986169e-01 1.565698e-02 2.395574e-02
## 30-34 1.453942e-01 9.239281e-03 1.748271e-02
## 35-44 8.326418e-02 8.243430e-03 1.344398e-02
## 45-54 2.168741e-02 2.213001e-03 3.042877e-03
## >55 5.089903e-03 8.298755e-04 1.327801e-03
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 1.192033e-03 2.691687e-04 7.690533e-05
## 15-19 5.429516e-02 8.075060e-03 3.383834e-03
## 20-24 1.560794e-01 2.514804e-02 1.565023e-02
## 25-29 1.584634e-01 2.434054e-02 2.114897e-02
## 30-34 1.712297e-01 1.999539e-02 2.168730e-02
## 35-44 1.483504e-01 1.891871e-02 2.514804e-02
## 45-54 6.225486e-02 7.767438e-03 1.242021e-02
## >55 3.164654e-02 4.152888e-03 8.305776e-03
#Exercise 3.5
table(std89c)
## , , Age = <=14
##
## Race
## Sex Black Other White
## Female 165 11 14
## Male 31 7 2
##
## , , Age = 15-19
##
## Race
## Sex Black Other White
## Female 2257 158 253
## Male 1412 210 88
##
## , , Age = 20-24
##
## Race
## Sex Black Other White
## Female 4503 307 475
## Male 4059 654 407
##
## , , Age = 25-29
##
## Race
## Sex Black Other White
## Female 3590 283 433
## Male 4121 633 550
##
## , , Age = 30-34
##
## Race
## Sex Black Other White
## Female 2628 167 316
## Male 4453 520 564
##
## , , Age = 35-44
##
## Race
## Sex Black Other White
## Female 1505 149 243
## Male 3858 492 654
##
## , , Age = 45-54
##
## Race
## Sex Black Other White
## Female 392 40 55
## Male 1619 202 323
##
## , , Age = >55
##
## Race
## Sex Black Other White
## Female 92 15 24
## Male 823 108 216
data.frame(table(std89c))
## Sex Race Age Freq
## 1 Female Black <=14 165
## 2 Male Black <=14 31
## 3 Female Other <=14 11
## 4 Male Other <=14 7
## 5 Female White <=14 14
## 6 Male White <=14 2
## 7 Female Black 15-19 2257
## 8 Male Black 15-19 1412
## 9 Female Other 15-19 158
## 10 Male Other 15-19 210
## 11 Female White 15-19 253
## 12 Male White 15-19 88
## 13 Female Black 20-24 4503
## 14 Male Black 20-24 4059
## 15 Female Other 20-24 307
## 16 Male Other 20-24 654
## 17 Female White 20-24 475
## 18 Male White 20-24 407
## 19 Female Black 25-29 3590
## 20 Male Black 25-29 4121
## 21 Female Other 25-29 283
## 22 Male Other 25-29 633
## 23 Female White 25-29 433
## 24 Male White 25-29 550
## 25 Female Black 30-34 2628
## 26 Male Black 30-34 4453
## 27 Female Other 30-34 167
## 28 Male Other 30-34 520
## 29 Female White 30-34 316
## 30 Male White 30-34 564
## 31 Female Black 35-44 1505
## 32 Male Black 35-44 3858
## 33 Female Other 35-44 149
## 34 Male Other 35-44 492
## 35 Female White 35-44 243
## 36 Male White 35-44 654
## 37 Female Black 45-54 392
## 38 Male Black 45-54 1619
## 39 Female Other 45-54 40
## 40 Male Other 45-54 202
## 41 Female White 45-54 55
## 42 Male White 45-54 323
## 43 Female Black >55 92
## 44 Male Black >55 823
## 45 Female Other >55 15
## 46 Male Other >55 108
## 47 Female White >55 24
## 48 Male White >55 216
#Exercise 3.6
rep(c(1,2,3), c(4,5,6))
## [1] 1 1 1 1 2 2 2 2 2 3 3 3 3 3 3
#Exercise 3.7
std89b <-
read.csv("https://raw.githubusercontent.com/taragonmd/data/master/syphilis89b.csv",
as.is = c(FALSE, FALSE, TRUE, FALSE))
str(std89b)
## 'data.frame': 48 obs. of 4 variables:
## $ Sex : Factor w/ 2 levels "Female","Male": 2 1 2 1 2 1 2 1 2 1 ...
## $ Race: Factor w/ 3 levels "Black","Other",..: 3 3 1 1 2 2 3 3 1 1 ...
## $ Age : chr "<=14" "<=14" "<=14" "<=14" ...
## $ Freq: int 2 14 31 165 7 11 88 253 1412 2257 ...
std89b
## Sex Race Age Freq
## 1 Male White <=14 2
## 2 Female White <=14 14
## 3 Male Black <=14 31
## 4 Female Black <=14 165
## 5 Male Other <=14 7
## 6 Female Other <=14 11
## 7 Male White 15-19 88
## 8 Female White 15-19 253
## 9 Male Black 15-19 1412
## 10 Female Black 15-19 2257
## 11 Male Other 15-19 210
## 12 Female Other 15-19 158
## 13 Male White 20-24 407
## 14 Female White 20-24 475
## 15 Male Black 20-24 4059
## 16 Female Black 20-24 4503
## 17 Male Other 20-24 654
## 18 Female Other 20-24 307
## 19 Male White 25-29 550
## 20 Female White 25-29 433
## 21 Male Black 25-29 4121
## 22 Female Black 25-29 3590
## 23 Male Other 25-29 633
## 24 Female Other 25-29 283
## 25 Male White 30-34 564
## 26 Female White 30-34 316
## 27 Male Black 30-34 4453
## 28 Female Black 30-34 2628
## 29 Male Other 30-34 520
## 30 Female Other 30-34 167
## 31 Male White 35-44 654
## 32 Female White 35-44 243
## 33 Male Black 35-44 3858
## 34 Female Black 35-44 1505
## 35 Male Other 35-44 492
## 36 Female Other 35-44 149
## 37 Male White 45-54 323
## 38 Female White 45-54 55
## 39 Male Black 45-54 1619
## 40 Female Black 45-54 392
## 41 Male Other 45-54 202
## 42 Female Other 45-54 40
## 43 Male White >55 216
## 44 Female White >55 24
## 45 Male Black >55 823
## 46 Female Black >55 92
## 47 Male Other >55 108
## 48 Female Other >55 15
expand.rows <- rep(1:nrow(std89b),std89b$Freq)
std89b.df <- std89b[expand.rows,]
agelab<-c("<=14","15-19","20-24","25-29","30-34","35-44","45-54",">55")
std89b.df$Age <- factor(std89b.df$Age, levels = agelab)
str(std89b.df)
## 'data.frame': 44081 obs. of 4 variables:
## $ Sex : Factor w/ 2 levels "Female","Male": 2 2 1 1 1 1 1 1 1 1 ...
## $ Race: Factor w/ 3 levels "Black","Other",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Age : Factor w/ 8 levels "<=14","15-19",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Freq: int 2 2 14 14 14 14 14 14 14 14 ...
table(std89b.df$Age)
##
## <=14 15-19 20-24 25-29 30-34 35-44 45-54 >55
## 230 4378 10405 9610 8648 6901 2631 1278