## Question 2
dat <- read.table(text = "January
325
January
317
February
312
February
302
March
346
March
339
April
340
April
328
May
355
May
361
June
342
June
333
July
358
July
341
August
346
August
343
September
365
September
359
October
355
October
305
November
324
November
312
December
342
December
321")
dat <- as.matrix(dat)
dat <- matrix(dat, ncol = 4, byrow = T)
dat <- as.data.frame(dat)
names(dat) <- c("mo1991", "number1991", "mo1992", "number1992")
dat[,c(2,4)] <- lapply(dat[,c(2,4)], function(x) as.numeric(as.character(x)))
rownames(dat) <- dat[,1]
dat <- dat[,c(2,4)]
dat
number1991 number1992
January 325 317
February 312 302
March 346 339
April 340 328
May 355 361
June 342 333
July 358 341
August 346 343
September 365 359
October 355 305
November 324 312
December 342 321
prop.table(as.matrix(dat), margin = 2)
number1991 number1992
January 0.07908 0.08003
February 0.07591 0.07624
March 0.08418 0.08558
April 0.08273 0.08281
May 0.08637 0.09114
June 0.08321 0.08407
July 0.08710 0.08609
August 0.08418 0.08659
September 0.08881 0.09063
October 0.08637 0.07700
November 0.07883 0.07877
December 0.08321 0.08104
library(foreign)
fhs <- read.dta("./fhs.2d92301d751b.dta")
head(fhs)
randid death angina hospmi mi_fchd anychd stroke cvd hyperten timeap timemi timemifc timechd timestrk timecvd
1 2448 No No Yes Yes Yes No Yes No 24.000 17.626 17.626 17.626 24.000 17.626
2 6238 No No No No No No No No 24.000 24.000 24.000 24.000 24.000 24.000
3 9428 No No No No No No No No 24.000 24.000 24.000 24.000 24.000 24.000
4 10552 Yes No No No No Yes Yes Yes 8.093 8.093 8.093 8.093 5.719 5.719
5 11252 No No No No No No No Yes 24.000 24.000 24.000 24.000 24.000 24.000
6 11263 No No No Yes Yes No Yes Yes 24.000 24.000 15.658 15.658 24.000 15.658
timedth timehyp sex1 totchol1 age1 sysbp1 diabp1 cursmoke1 cigpday1 bmi1 diabetes1 bpmeds1 heartrte1 glucose1
1 24.000 24.00 Male 195 39 106.0 70 No 0 26.97 No No 80 77
2 24.000 24.00 Female 250 46 121.0 81 No 0 28.73 No No 95 76
3 24.000 24.00 Male 245 48 127.5 80 Yes 20 25.34 No No 75 70
4 8.093 0.00 Female 225 61 150.0 95 Yes 30 28.58 No No 65 103
5 24.000 11.73 Female 285 46 130.0 84 Yes 23 23.10 No No 85 85
6 24.000 0.00 Female 228 43 180.0 110 No 0 30.30 No No 77 99
prevchd1 prevap1 prevmi1 prevstrk1 prevhyp1 hdlc1 ldlc1 sex2 totchol2 age2 sysbp2 diabp2 cursmoke2 cigpday2
1 No No No No No NA NA <NA> NA NA NA NA <NA> NA
2 No No No No No NA NA Female 260 52 105 69.5 No 0
3 No No No No No NA NA Male 283 54 141 89.0 Yes 30
4 No No No No Yes NA NA Female 232 67 183 109.0 Yes 20
5 No No No No No NA NA Female 343 51 109 77.0 Yes 30
6 No No No No Yes NA NA Female 230 49 177 102.0 No 0
bmi2 diabetes2 bpmeds2 heartrte2 glucose2 prevchd2 prevap2 prevmi2 prevstrk2 prevhyp2 hdlc2 ldlc2 sex3
1 NA <NA> <NA> NA NA <NA> <NA> <NA> <NA> <NA> NA NA Male
2 29.43 No No 80 86 No No No No No NA NA Female
3 25.34 No No 75 87 No No No No No NA NA <NA>
4 30.18 No No 60 89 No No No No Yes NA NA <NA>
5 23.48 No No 90 72 No No No No No NA NA Female
6 31.36 No Yes 120 86 No No No No Yes NA NA Female
totchol3 age3 sysbp3 diabp3 cursmoke3 cigpday3 bmi3 diabetes3 bpmeds3 heartrte3 glucose3 prevchd3 prevap3
1 209 52 121 66 No 0 NA No No 69 92 No No
2 237 58 108 66 No 0 28.50 No No 80 71 No No
3 NA NA NA NA <NA> NA NA <NA> <NA> NA NA <NA> <NA>
4 NA NA NA NA <NA> NA NA <NA> <NA> NA NA <NA> <NA>
5 NA 58 155 90 Yes 30 24.61 No No 74 NA No No
6 220 55 180 106 No 0 31.17 Yes Yes 86 81 No No
prevmi3 prevstrk3 prevhyp3 hdlc3 ldlc3
1 No No No 31 178
2 No No No 54 141
3 <NA> <NA> <NA> NA NA
4 <NA> <NA> <NA> NA NA
5 No No Yes NA NA
6 No No Yes 46 135
## 2.
library(Deducer)
Note: On Mac OS X we strongly recommend using iplots from within JGR.
Proceed at your own risk as iplots cannot resolve potential ev.loop deadlocks.
'Yes' is assumed for all dialogs as they cannot be shown without a deadlock,
also ievent.wait() is disabled.
frequencies(fhs[,grep("hyp[0-9]", names(fhs))])
$prevhyp1
------------------------------------------------------------
-- Frequencies --
-- --
Value # of Cases % Cumulative %
1 No 3004 67.7 67.7
2 Yes 1430 32.3 100.0
-- --
-- Case Summary --
-- --
Valid Missing Total
# of cases 4434 0 4434
-- --
-- --
------------------------------------------------------------
$prevhyp2
------------------------------------------------------------
-- Frequencies --
-- --
Value # of Cases % Cumulative %
1 No 1971 50.2 50.2
2 Yes 1959 49.8 100.0
-- --
-- Case Summary --
-- --
Valid Missing Total
# of cases 3930 504 4434
-- --
-- --
------------------------------------------------------------
$prevhyp3
------------------------------------------------------------
-- Frequencies --
-- --
Value # of Cases % Cumulative %
1 No 1308 40.1 40.1
2 Yes 1955 59.9 100.0
-- --
-- Case Summary --
-- --
Valid Missing Total
# of cases 3263 1171 4434
-- --
-- --
------------------------------------------------------------
contingency.tables(row.vars = prevhyp1, col.vars = sex1, data = fhs)
====================================================================================================================
======================================================================================
========== Table: prevhyp1 by sex1 ==========
| sex1
prevhyp1 | Male | Female | Row Total |
-----------------------|-----------|-----------|-----------|
No Count | 1313 | 1691 | 3004 |
Row % | 43.708% | 56.292% | 67.749% |
Column % | 67.541% | 67.912% | |
Total % | 29.612% | 38.137% | |
-----------------------|-----------|-----------|-----------|
Yes Count | 631 | 799 | 1430 |
Row % | 44.126% | 55.874% | 32.251% |
Column % | 32.459% | 32.088% | |
Total % | 14.231% | 18.020% | |
-----------------------|-----------|-----------|-----------|
Column Total | 1944 | 2490 | 4434 |
Column % | 43.843% | 56.157% | |
====================================================================================================================
## 3.
frequencies(fhs$bmi1 > 25)
$data
------------------------------------------------------------
-- Frequencies --
-- --
Value # of Cases % Cumulative %
1 FALSE 1993 45.1 45.1
2 TRUE 2422 54.9 100.0
-- --
-- Case Summary --
-- --
Valid Missing Total
# of cases 4415 19 4434
-- --
-- --
------------------------------------------------------------
## 4.
library(ggplot2)
qplot(bmi1, bmi2, data = fhs)
## 1
mean(fhs$bmi1, na = T)
[1] 25.85
## 2
library(lattice)
densityplot(fhs$bmi1)
## 3
ggplot(fhs, aes(y = bmi1, x = sex1)) + geom_boxplot()
library(doBy)
summaryBy(bmi1 ~ sex1, fhs, FUN = mean, na.rm = T)
sex1 bmi1.mean
1 Male 26.17
2 Female 25.59
## 6
fhs.bmi <- subset(fhs, !is.na(bmi1) & !is.na(bmi2))
fhs.bmi <- within(fhs.bmi, {
bmi.diff <- (bmi2 - bmi1)
})
bmi.mean <- mean(fhs.bmi$bmi.diff)
bmi.mean
[1] 0.06783
## 7
bmi.sd <- sd(fhs.bmi$bmi.diff)
bmi.sd
[1] 1.802
## 8
diff(range(fhs.bmi$bmi.diff))
[1] 20.93
## 9
t.test(fhs.bmi$bmi.diff)
One Sample t-test
data: fhs.bmi$bmi.diff
t = 2.354, df = 3908, p-value = 0.01862
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
0.01134 0.12432
sample estimates:
mean of x
0.06783
bmi.mean + bmi.sd * c(-2,2)
[1] -3.535 3.671