PH207x Week 1

Homework 1 part 2

## Question 2
dat <- read.table(text = "January
325
January
317
February
312
February
302
March
346
March
339
April
340
April
328
May
355
May
361
June
342
June
333
July
358
July
341
August
346
August
343
September
365
September
359
October
355
October
305
November
324
November
312
December
342
December
321")

dat <- as.matrix(dat)
dat <- matrix(dat, ncol = 4, byrow = T)
dat <- as.data.frame(dat)
names(dat) <- c("mo1991", "number1991", "mo1992", "number1992")
dat[,c(2,4)] <- lapply(dat[,c(2,4)], function(x) as.numeric(as.character(x)))
rownames(dat) <- dat[,1]
dat <- dat[,c(2,4)]

dat
          number1991 number1992
January          325        317
February         312        302
March            346        339
April            340        328
May              355        361
June             342        333
July             358        341
August           346        343
September        365        359
October          355        305
November         324        312
December         342        321
prop.table(as.matrix(dat), margin = 2)
          number1991 number1992
January      0.07908    0.08003
February     0.07591    0.07624
March        0.08418    0.08558
April        0.08273    0.08281
May          0.08637    0.09114
June         0.08321    0.08407
July         0.08710    0.08609
August       0.08418    0.08659
September    0.08881    0.09063
October      0.08637    0.07700
November     0.07883    0.07877
December     0.08321    0.08104

Part 3

library(foreign)
fhs <- read.dta("./fhs.2d92301d751b.dta")

head(fhs)
  randid death angina hospmi mi_fchd anychd stroke cvd hyperten timeap timemi timemifc timechd timestrk timecvd
1   2448    No     No    Yes     Yes    Yes     No Yes       No 24.000 17.626   17.626  17.626   24.000  17.626
2   6238    No     No     No      No     No     No  No       No 24.000 24.000   24.000  24.000   24.000  24.000
3   9428    No     No     No      No     No     No  No       No 24.000 24.000   24.000  24.000   24.000  24.000
4  10552   Yes     No     No      No     No    Yes Yes      Yes  8.093  8.093    8.093   8.093    5.719   5.719
5  11252    No     No     No      No     No     No  No      Yes 24.000 24.000   24.000  24.000   24.000  24.000
6  11263    No     No     No     Yes    Yes     No Yes      Yes 24.000 24.000   15.658  15.658   24.000  15.658
  timedth timehyp   sex1 totchol1 age1 sysbp1 diabp1 cursmoke1 cigpday1  bmi1 diabetes1 bpmeds1 heartrte1 glucose1
1  24.000   24.00   Male      195   39  106.0     70        No        0 26.97        No      No        80       77
2  24.000   24.00 Female      250   46  121.0     81        No        0 28.73        No      No        95       76
3  24.000   24.00   Male      245   48  127.5     80       Yes       20 25.34        No      No        75       70
4   8.093    0.00 Female      225   61  150.0     95       Yes       30 28.58        No      No        65      103
5  24.000   11.73 Female      285   46  130.0     84       Yes       23 23.10        No      No        85       85
6  24.000    0.00 Female      228   43  180.0    110        No        0 30.30        No      No        77       99
  prevchd1 prevap1 prevmi1 prevstrk1 prevhyp1 hdlc1 ldlc1   sex2 totchol2 age2 sysbp2 diabp2 cursmoke2 cigpday2
1       No      No      No        No       No    NA    NA   <NA>       NA   NA     NA     NA      <NA>       NA
2       No      No      No        No       No    NA    NA Female      260   52    105   69.5        No        0
3       No      No      No        No       No    NA    NA   Male      283   54    141   89.0       Yes       30
4       No      No      No        No      Yes    NA    NA Female      232   67    183  109.0       Yes       20
5       No      No      No        No       No    NA    NA Female      343   51    109   77.0       Yes       30
6       No      No      No        No      Yes    NA    NA Female      230   49    177  102.0        No        0
   bmi2 diabetes2 bpmeds2 heartrte2 glucose2 prevchd2 prevap2 prevmi2 prevstrk2 prevhyp2 hdlc2 ldlc2   sex3
1    NA      <NA>    <NA>        NA       NA     <NA>    <NA>    <NA>      <NA>     <NA>    NA    NA   Male
2 29.43        No      No        80       86       No      No      No        No       No    NA    NA Female
3 25.34        No      No        75       87       No      No      No        No       No    NA    NA   <NA>
4 30.18        No      No        60       89       No      No      No        No      Yes    NA    NA   <NA>
5 23.48        No      No        90       72       No      No      No        No       No    NA    NA Female
6 31.36        No     Yes       120       86       No      No      No        No      Yes    NA    NA Female
  totchol3 age3 sysbp3 diabp3 cursmoke3 cigpday3  bmi3 diabetes3 bpmeds3 heartrte3 glucose3 prevchd3 prevap3
1      209   52    121     66        No        0    NA        No      No        69       92       No      No
2      237   58    108     66        No        0 28.50        No      No        80       71       No      No
3       NA   NA     NA     NA      <NA>       NA    NA      <NA>    <NA>        NA       NA     <NA>    <NA>
4       NA   NA     NA     NA      <NA>       NA    NA      <NA>    <NA>        NA       NA     <NA>    <NA>
5       NA   58    155     90       Yes       30 24.61        No      No        74       NA       No      No
6      220   55    180    106        No        0 31.17       Yes     Yes        86       81       No      No
  prevmi3 prevstrk3 prevhyp3 hdlc3 ldlc3
1      No        No       No    31   178
2      No        No       No    54   141
3    <NA>      <NA>     <NA>    NA    NA
4    <NA>      <NA>     <NA>    NA    NA
5      No        No      Yes    NA    NA
6      No        No      Yes    46   135

## 2.
library(Deducer)
Note: On Mac OS X we strongly recommend using iplots from within JGR.
Proceed at your own risk as iplots cannot resolve potential ev.loop deadlocks.
'Yes' is assumed for all dialogs as they cannot be shown without a deadlock,
also ievent.wait() is disabled.
frequencies(fhs[,grep("hyp[0-9]", names(fhs))])
$prevhyp1
------------------------------------------------------------
--                        Frequencies                     --
--                                                        --
  Value # of Cases       % Cumulative %
1    No       3004    67.7         67.7
2   Yes       1430    32.3        100.0
--                                                        --
--                        Case Summary                    --
--                                                        --
           Valid Missing Total
# of cases  4434       0  4434
--                                                        --
--                                                        --
------------------------------------------------------------






$prevhyp2
------------------------------------------------------------
--                        Frequencies                     --
--                                                        --
  Value # of Cases       % Cumulative %
1    No       1971    50.2         50.2
2   Yes       1959    49.8        100.0
--                                                        --
--                        Case Summary                    --
--                                                        --
           Valid Missing Total
# of cases  3930     504  4434
--                                                        --
--                                                        --
------------------------------------------------------------






$prevhyp3
------------------------------------------------------------
--                        Frequencies                     --
--                                                        --
  Value # of Cases       % Cumulative %
1    No       1308    40.1         40.1
2   Yes       1955    59.9        100.0
--                                                        --
--                        Case Summary                    --
--                                                        --
           Valid Missing Total
# of cases  3263    1171  4434
--                                                        --
--                                                        --
------------------------------------------------------------

contingency.tables(row.vars = prevhyp1, col.vars = sex1, data = fhs)
====================================================================================================================

               ======================================================================================               
                                   ========== Table: prevhyp1 by sex1 ==========                                   
                       | sex1 
              prevhyp1 |     Male  |   Female  | Row Total | 
-----------------------|-----------|-----------|-----------|
          No  Count    |     1313  |     1691  |     3004  | 
              Row %    |   43.708% |   56.292% |   67.749% | 
              Column % |   67.541% |   67.912% |           | 
              Total %  |   29.612% |   38.137% |           | 
-----------------------|-----------|-----------|-----------|
         Yes  Count    |      631  |      799  |     1430  | 
              Row %    |   44.126% |   55.874% |   32.251% | 
              Column % |   32.459% |   32.088% |           | 
              Total %  |   14.231% |   18.020% |           | 
-----------------------|-----------|-----------|-----------|
          Column Total |     1944  |     2490  |     4434  | 
              Column % |   43.843% |   56.157% |           | 


====================================================================================================================

## 3.
frequencies(fhs$bmi1 > 25)
$data
------------------------------------------------------------
--                        Frequencies                     --
--                                                        --
  Value # of Cases       % Cumulative %
1 FALSE       1993    45.1         45.1
2  TRUE       2422    54.9        100.0
--                                                        --
--                        Case Summary                    --
--                                                        --
           Valid Missing Total
# of cases  4415      19  4434
--                                                        --
--                                                        --
------------------------------------------------------------


## 4.
library(ggplot2)
qplot(bmi1, bmi2, data = fhs)

plot of chunk unnamed-chunk-3

Part 4

## 1
mean(fhs$bmi1, na = T)
[1] 25.85

## 2
library(lattice)
densityplot(fhs$bmi1)

plot of chunk unnamed-chunk-4


## 3
ggplot(fhs, aes(y = bmi1, x = sex1)) + geom_boxplot()

plot of chunk unnamed-chunk-4

library(doBy)
summaryBy(bmi1 ~ sex1, fhs, FUN = mean, na.rm = T)
    sex1 bmi1.mean
1   Male     26.17
2 Female     25.59

## 6
fhs.bmi <- subset(fhs, !is.na(bmi1) & !is.na(bmi2))

fhs.bmi <- within(fhs.bmi, {
    bmi.diff <- (bmi2 - bmi1)
})

bmi.mean <- mean(fhs.bmi$bmi.diff)
bmi.mean
[1] 0.06783

## 7
bmi.sd <- sd(fhs.bmi$bmi.diff)
bmi.sd
[1] 1.802

## 8
diff(range(fhs.bmi$bmi.diff))
[1] 20.93

## 9
t.test(fhs.bmi$bmi.diff)

    One Sample t-test

data:  fhs.bmi$bmi.diff 
t = 2.354, df = 3908, p-value = 0.01862
alternative hypothesis: true mean is not equal to 0 
95 percent confidence interval:
 0.01134 0.12432 
sample estimates:
mean of x 
  0.06783 


bmi.mean + bmi.sd * c(-2,2)
[1] -3.535  3.671