PH 251D - Final Project 1

Allison Gonzales

Run a program file (filename1.R) using the 'source' command;

source("C:/Users/Allie/Documents/MPH Fall 2013/PH 251D/RProj/filename1.R")

Demonstrate reading an ASCII data file (filename2.dat) to create a 'data frame';

df1 <- read.table("c:/users/Allie/Documents/MPH Fall 2013/PH 251D/RProj/filename2.dat")

Demonstrate simple data manipulation (e.g., variable transformation, recoding, etc.);

data(UKLungDeaths)
totaldeathcat <- cut(ldeaths, breaks = c(0, 1000, 2000, 3000))
totallabels <- c("<1000", "1000-1999", "2000-2999", ">=3000")
totallabels

## [1] "<1000"     "1000-1999" "2000-2999" ">=3000"

Demonstrate the use of calendar and Julian dates;

my.bday <- "09/11/1987"
my.bday.julian <- as.Date(my.bday, format = "%m/%d/%Y")
my.bday.julian

## [1] "1987-09-11"

Conduct a simple analysis using existing functions (from R, colleagues, etc.);

mean(ldeaths)

## [1] 2057

mean(fdeaths)

## [1] 560.7

mean(mdeaths)

## [1] 1496

t.test(fdeaths, mdeaths)

## 
##  Welch Two Sample t-test
## 
## data:  fdeaths and mdeaths
## t = -16.92, df = 94.74, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1045.0  -825.5
## sample estimates:
## mean of x mean of y 
##     560.7    1495.9

Conduct a simple analysis demonstrating simple programming (e.g., a 'for' loop);

for (i in ldeaths) {
    print(i < 2000)
}

## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] FALSE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE

Conduct a simple analysis demonstrating an original function created by student;

dat <- matrix(c(23, 45, 37, 24), 2, 2)
dat

##      [,1] [,2]
## [1,]   23   37
## [2,]   45   24

riskratio = function(x) {
    a = dat[1, 1]
    b = dat[1, 2]
    c = dat[2, 1]
    d = dat[2, 2]
    div.RR = ((a/(a + b))/(c/(c + d)))
    list(data = dat, risk.ratio = div.RR)
}

Create a simple graph with title, axes labels and legend, and output to file;

plot(ldeaths, type = "l", col = "green", xlab = "Time (Years)", ylab = "Deaths")
max_range <- range(0, ldeaths)
title(main = "Yearly Deaths from Lung Disease in UK")
legend(1, max_range[2], c("total"), col = "green")

plot of chunk unnamed-chunk-8

Demonstrate the use of regular expressions;

groceries <- c("milk", "yogurt", "turkey", "shrimp", "cereal", "oatmeal", "sugar", 
    "eggs", "shortening")
groceries[grep("s", groceries)]

## [1] "shrimp"     "sugar"      "eggs"       "shortening"

groceries[grep(".s", groceries)]

## [1] "eggs"

Demonstrate the use of the 'sink' function to generate an output file;

sink("C:/Users/Allie/Documents/MPH Fall 2013/PH 251D/RProj/filename1.log")
source("C:/Users/Allie/Documents/MPH Fall 2013/PH 251D/RProj/filename1.R")
sink()