Final Project Part 1 for PH251D

#1. Run a program file (filename1.R) using the 'source' command;

## 1
source("/Users/Julia/Documents/sourcefile.R", echo = TRUE)

## 
## > UCBAdmissions
## , , Dept = A
## 
##           Gender
## Admit      Male Female
##   Admitted  512     89
##   Rejected  313     19
## 
## , , Dept = B
## 
##           Gender
## Admit      Male Female
##   Admitted  353     17
##   Rejected  207      8
## 
## , , Dept = C
## 
##           Gender
## Admit      Male Female
##   Admitted  120    202
##   Rejected  205    391
## 
## , , Dept = D
## 
##           Gender
## Admit      Male Female
##   Admitted  138    131
##   Rejected  279    244
## 
## , , Dept = E
## 
##           Gender
## Admit      Male Female
##   Admitted   53     94
##   Rejected  138    299
## 
## , , Dept = F
## 
##           Gender
## Admit      Male Female
##   Admitted   22     24
##   Rejected  351    317
## 
## 
## > str(UCBAdmissions)
##  table [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Admit : chr [1:2] "Admitted" "Rejected"
##   ..$ Gender: chr [1:2] "Male" "Female"
##   ..$ Dept  : chr [1:6] "A" "B" "C" "D" ...

#2. Demonstrate reading an ASCII data file (filename2.dat) to create a 'data frame';

## 2
problem2 <- read.table("/Users/Julia/Documents/problem2datafile.dat", header = TRUE, 
    sep = ".", as.is = TRUE, na.strings = c(".", "Unknown"))
head(problem2)

##   id         county age sex syndrome date.onset date.tested death
## 1  1 San Bernardino  40   F      WNF 05/19/2004  06/02/2004    No
## 2  2 San Bernardino  64   F      WNF 05/22/2004  06/16/2004    No
## 3  3 San Bernardino  19   M      WNF 05/22/2004  06/16/2004    No
## 4  4 San Bernardino  12   M      WNF 05/16/2004  06/16/2004    No
## 5  5 San Bernardino  12   M      WNF 05/14/2004  06/16/2004    No
## 6  6 San Bernardino  17   M      WNF 06/07/2004  06/17/2004    No
##   date.onset.international date.tested.international
## 1               2004-05-19                2004-06-02
## 2               2004-05-22                2004-06-16
## 3               2004-05-22                2004-06-16
## 4               2004-05-16                2004-06-16
## 5               2004-05-14                2004-06-16
## 6               2004-06-07                2004-06-17

#3. Demonstrate simple data manipulation (e.g., variable transformation, recoding, etc.);

## 3
data(ToothGrowth)
ToothGrowth <- transform(ToothGrowth, len.centered = len - mean(len))
ToothGrowth

##     len supp dose len.centered
## 1   4.2   VC  0.5    -14.61333
## 2  11.5   VC  0.5     -7.31333
## 3   7.3   VC  0.5    -11.51333
## 4   5.8   VC  0.5    -13.01333
## 5   6.4   VC  0.5    -12.41333
## 6  10.0   VC  0.5     -8.81333
## 7  11.2   VC  0.5     -7.61333
## 8  11.2   VC  0.5     -7.61333
## 9   5.2   VC  0.5    -13.61333
## 10  7.0   VC  0.5    -11.81333
## 11 16.5   VC  1.0     -2.31333
## 12 16.5   VC  1.0     -2.31333
## 13 15.2   VC  1.0     -3.61333
## 14 17.3   VC  1.0     -1.51333
## 15 22.5   VC  1.0      3.68667
## 16 17.3   VC  1.0     -1.51333
## 17 13.6   VC  1.0     -5.21333
## 18 14.5   VC  1.0     -4.31333
## 19 18.8   VC  1.0     -0.01333
## 20 15.5   VC  1.0     -3.31333
## 21 23.6   VC  2.0      4.78667
## 22 18.5   VC  2.0     -0.31333
## 23 33.9   VC  2.0     15.08667
## 24 25.5   VC  2.0      6.68667
## 25 26.4   VC  2.0      7.58667
## 26 32.5   VC  2.0     13.68667
## 27 26.7   VC  2.0      7.88667
## 28 21.5   VC  2.0      2.68667
## 29 23.3   VC  2.0      4.48667
## 30 29.5   VC  2.0     10.68667
## 31 15.2   OJ  0.5     -3.61333
## 32 21.5   OJ  0.5      2.68667
## 33 17.6   OJ  0.5     -1.21333
## 34  9.7   OJ  0.5     -9.11333
## 35 14.5   OJ  0.5     -4.31333
## 36 10.0   OJ  0.5     -8.81333
## 37  8.2   OJ  0.5    -10.61333
## 38  9.4   OJ  0.5     -9.41333
## 39 16.5   OJ  0.5     -2.31333
## 40  9.7   OJ  0.5     -9.11333
## 41 19.7   OJ  1.0      0.88667
## 42 23.3   OJ  1.0      4.48667
## 43 23.6   OJ  1.0      4.78667
## 44 26.4   OJ  1.0      7.58667
## 45 20.0   OJ  1.0      1.18667
## 46 25.2   OJ  1.0      6.38667
## 47 25.8   OJ  1.0      6.98667
## 48 21.2   OJ  1.0      2.38667
## 49 14.5   OJ  1.0     -4.31333
## 50 27.3   OJ  1.0      8.48667
## 51 25.5   OJ  2.0      6.68667
## 52 26.4   OJ  2.0      7.58667
## 53 22.4   OJ  2.0      3.58667
## 54 24.5   OJ  2.0      5.68667
## 55 24.8   OJ  2.0      5.98667
## 56 30.9   OJ  2.0     12.08667
## 57 26.4   OJ  2.0      7.58667
## 58 27.3   OJ  2.0      8.48667
## 59 29.4   OJ  2.0     10.58667
## 60 23.0   OJ  2.0      4.18667

#4. Demonstrate the use of calendar and Julian dates;

## 4
FamilyFriendBday <- c("1/5/1989", "2/20/1988", "3/7/1947", "3/16/1910", "4/19/1988", 
    "6/15/1988", "8/2/1947", "9/11/2013", "9/22/1989", "9/23/1988", "10/26/1988", 
    "10/30/1987", "11/13/1988", "12/10/1987")
FamilyFriendBday.julian <- as.Date(FamilyFriendBday, format = "%m/%d/%Y")
Julian <- julian(FamilyFriendBday.julian)
FamilyFriendBdaydata <- data.frame(DOB = FamilyFriendBday, `DOB Standard Format` = FamilyFriendBday.julian, 
    Julian = Julian)
FamilyFriendBdaydata

##           DOB DOB.Standard.Format Julian
## 1    1/5/1989          1989-01-05   6944
## 2   2/20/1988          1988-02-20   6624
## 3    3/7/1947          1947-03-07  -8336
## 4   3/16/1910          1910-03-16 -21841
## 5   4/19/1988          1988-04-19   6683
## 6   6/15/1988          1988-06-15   6740
## 7    8/2/1947          1947-08-02  -8188
## 8   9/11/2013          2013-09-11  15959
## 9   9/22/1989          1989-09-22   7204
## 10  9/23/1988          1988-09-23   6840
## 11 10/26/1988          1988-10-26   6873
## 12 10/30/1987          1987-10-30   6511
## 13 11/13/1988          1988-11-13   6891
## 14 12/10/1987          1987-12-10   6552

#5. Conduct a simple analysis using existing functions (from R, colleagues, etc.);

## 5
schistodata <- matrix(c(347, 13, 467, 38), 2, 2)
dimnames(schistodata) <- list(`Swam in lake` = c("Yes", "No"), Schistosomiasis = c("Positive", 
    "Negative"))
schistodata

##             Schistosomiasis
## Swam in lake Positive Negative
##          Yes      347      467
##          No        13       38

library(epitools)
oddsratio.wald(schistodata)

## $data
##             Schistosomiasis
## Swam in lake Positive Negative Total
##        Yes        347      467   814
##        No          13       38    51
##        Total      360      505   865
## 
## $measure
##             odds ratio with 95% C.I.
## Swam in lake estimate lower upper
##          Yes    1.000    NA    NA
##          No     2.172  1.14 4.139
## 
## $p.value
##             two-sided
## Swam in lake midp.exact fisher.exact chi.square
##          Yes         NA           NA         NA
##          No     0.01483      0.01854    0.01601
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"

#6. Conduct a simple analysis demonstrating simple programming (e.g., a 'for' loop);

## 6
schistototals <- apply(schistodata, 1, sum)
schistodata2 <- cbind(schistodata, Total = schistototals)
swamtotals <- apply(schistodata2, 2, sum)
schistodatafinal <- rbind(schistodata2, Total = swamtotals)
names(dimnames(schistodatafinal)) <- c("Swam in lake", "Schistosomiasis")
schistodatafinal

##             Schistosomiasis
## Swam in lake Positive Negative Total
##        Yes        347      467   814
##        No          13       38    51
##        Total      360      505   865

for (i in schistodatafinal) {
    cat(i/865)
}

## 0.40120.015030.41620.53990.043930.58380.9410.058961

#7. Conduct a simple analysis demonstrating an original function created by student;

## 7
ARandCIR <- function(x) {
    a = x[1, 1]
    b = x[1, 2]
    c = x[2, 1]
    d = x[2, 2]
    rr = (a/(a + b))/(c/(c + d))
    ar = (a/(a + b)) - (c/(c + d))
    arpercent = ((rr - 1)/rr) * 100
    list(data = x, Cumulative.Incidence.Ratio = rr, Attributable.Risk = ar, 
        Attributable.Risk.Percent = arpercent)
}
ARandCIR(schistodata)

## $data
##             Schistosomiasis
## Swam in lake Positive Negative
##          Yes      347      467
##          No        13       38
## 
## $Cumulative.Incidence.Ratio
## [1] 1.672
## 
## $Attributable.Risk
## [1] 0.1714
## 
## $Attributable.Risk.Percent
## [1] 40.2

#8. Create a simple graph with title, axes labels and legend, and output to file;

## 8
ts.plot(ldeaths, mdeaths, fdeaths, gpars = list(xlab = "year", ylab = "deaths", 
    main = "Monthly Deaths from Lung Diseases in the UK, 1974-1979", lty = 1, 
    lwd = 2, col = c("black", "skyblue", "pink")))
legend(1974, 4000, legend = c("both genders", "male", "female"), lwd = 2, lty = 1, 
    col = c("black", "skyblue", "pink"))

plot of chunk unnamed-chunk-8

#9. Demonstrate the use of regular expressions;

## 9
state <- data.frame(state.name, state.region)
state

##        state.name  state.region
## 1         Alabama         South
## 2          Alaska          West
## 3         Arizona          West
## 4        Arkansas         South
## 5      California          West
## 6        Colorado          West
## 7     Connecticut     Northeast
## 8        Delaware         South
## 9         Florida         South
## 10        Georgia         South
## 11         Hawaii          West
## 12          Idaho          West
## 13       Illinois North Central
## 14        Indiana North Central
## 15           Iowa North Central
## 16         Kansas North Central
## 17       Kentucky         South
## 18      Louisiana         South
## 19          Maine     Northeast
## 20       Maryland         South
## 21  Massachusetts     Northeast
## 22       Michigan North Central
## 23      Minnesota North Central
## 24    Mississippi         South
## 25       Missouri North Central
## 26        Montana          West
## 27       Nebraska North Central
## 28         Nevada          West
## 29  New Hampshire     Northeast
## 30     New Jersey     Northeast
## 31     New Mexico          West
## 32       New York     Northeast
## 33 North Carolina         South
## 34   North Dakota North Central
## 35           Ohio North Central
## 36       Oklahoma         South
## 37         Oregon          West
## 38   Pennsylvania     Northeast
## 39   Rhode Island     Northeast
## 40 South Carolina         South
## 41   South Dakota North Central
## 42      Tennessee         South
## 43          Texas         South
## 44           Utah          West
## 45        Vermont     Northeast
## 46       Virginia         South
## 47     Washington          West
## 48  West Virginia         South
## 49      Wisconsin North Central
## 50        Wyoming          West

western.states <- state[grep("West", state$state.region), ]
western.states

##    state.name state.region
## 2      Alaska         West
## 3     Arizona         West
## 5  California         West
## 6    Colorado         West
## 11     Hawaii         West
## 12      Idaho         West
## 26    Montana         West
## 28     Nevada         West
## 31 New Mexico         West
## 37     Oregon         West
## 44       Utah         West
## 47 Washington         West
## 50    Wyoming         West

newstate <- gsub("West", "W", state$state.region)
state2 <- data.frame(state.name, newstate)
state2

##        state.name      newstate
## 1         Alabama         South
## 2          Alaska             W
## 3         Arizona             W
## 4        Arkansas         South
## 5      California             W
## 6        Colorado             W
## 7     Connecticut     Northeast
## 8        Delaware         South
## 9         Florida         South
## 10        Georgia         South
## 11         Hawaii             W
## 12          Idaho             W
## 13       Illinois North Central
## 14        Indiana North Central
## 15           Iowa North Central
## 16         Kansas North Central
## 17       Kentucky         South
## 18      Louisiana         South
## 19          Maine     Northeast
## 20       Maryland         South
## 21  Massachusetts     Northeast
## 22       Michigan North Central
## 23      Minnesota North Central
## 24    Mississippi         South
## 25       Missouri North Central
## 26        Montana             W
## 27       Nebraska North Central
## 28         Nevada             W
## 29  New Hampshire     Northeast
## 30     New Jersey     Northeast
## 31     New Mexico             W
## 32       New York     Northeast
## 33 North Carolina         South
## 34   North Dakota North Central
## 35           Ohio North Central
## 36       Oklahoma         South
## 37         Oregon             W
## 38   Pennsylvania     Northeast
## 39   Rhode Island     Northeast
## 40 South Carolina         South
## 41   South Dakota North Central
## 42      Tennessee         South
## 43          Texas         South
## 44           Utah             W
## 45        Vermont     Northeast
## 46       Virginia         South
## 47     Washington             W
## 48  West Virginia         South
## 49      Wisconsin North Central
## 50        Wyoming             W

#10. Demonstrate the use of the 'sink' function to generate an output file;

## 10
sink("/Users/Julia/Documents/sinkfile.log")
source("/Users/Julia/Documents/sourcefile.R", echo = TRUE)

## 
## > UCBAdmissions
## , , Dept = A
## 
##           Gender
## Admit      Male Female
##   Admitted  512     89
##   Rejected  313     19
## 
## , , Dept = B
## 
##           Gender
## Admit      Male Female
##   Admitted  353     17
##   Rejected  207      8
## 
## , , Dept = C
## 
##           Gender
## Admit      Male Female
##   Admitted  120    202
##   Rejected  205    391
## 
## , , Dept = D
## 
##           Gender
## Admit      Male Female
##   Admitted  138    131
##   Rejected  279    244
## 
## , , Dept = E
## 
##           Gender
## Admit      Male Female
##   Admitted   53     94
##   Rejected  138    299
## 
## , , Dept = F
## 
##           Gender
## Admit      Male Female
##   Admitted   22     24
##   Rejected  351    317
## 
## 
## > str(UCBAdmissions)
##  table [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Admit : chr [1:2] "Admitted" "Rejected"
##   ..$ Gender: chr [1:2] "Male" "Female"
##   ..$ Dept  : chr [1:6] "A" "B" "C" "D" ...

sink()

## I used the sink function to open a connection and create an output file
## (named sinkfile.log) and then closed the connection. The output file
## prints all that is contained in sourcefile.R