# Kelsi Perttula PH 251D September 16, 2013
# Problems 1.1. If you have not done so already, install R on your personal
# computer. What is the R workspace file on your operating systems? What is
# the file path to your R workspace file? What is the name of this workspace
# file?
"/User/kelsi/Dropbox/PH251D/HW1.R is the path to my workspace, HW1.R"
## [1] "/User/kelsi/Dropbox/PH251D/HW1.R is the path to my workspace, HW1.R"
save.image() #is how I save the workspace
# R file- HW1.R
getwd() #is how you know what directory your workspace was saved in
## [1] "/Users/kelsi/Dropbox/PH251D"
##
# 1.2. By default, which R packages come already loaded? What are the file
# paths to the default R packages?
searchpaths() #displays the paths of the auto-lodaed packages
## [1] ".GlobalEnv"
## [2] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/knitr"
## [3] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/stats"
## [4] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/graphics"
## [5] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/grDevices"
## [6] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/utils"
## [7] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/datasets"
## [8] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/methods"
## [9] "Autoloads"
## [10] "/Library/Frameworks/R.framework/Resources/library/base"
# 1.3. List all the object in the current workspace. If there are none,
# create some data objects. Using one expression, remove all the objects in
# the current workspace.
x <- 5
y <- c(4, 6, 7, 2)
ls()
## [1] "x" "y"
rm(list = ls())
ls()
## character(0)
# 1.4. One inch equals 2.54 centimeters. Correct the following R code and
# create a conversion table.
inches <- 1:12
centimeters <- inches * 2.54 #switching from '/' to '*' fixed the table
z = cbind(inches, centimeters)
z
## inches centimeters
## [1,] 1 2.54
## [2,] 2 5.08
## [3,] 3 7.62
## [4,] 4 10.16
## [5,] 5 12.70
## [6,] 6 15.24
## [7,] 7 17.78
## [8,] 8 20.32
## [9,] 9 22.86
## [10,] 10 25.40
## [11,] 11 27.94
## [12,] 12 30.48
# 1.5. To convert between temperatures in degrees Celsius (C) and Farenheit
# (F), we use the following conversion formulas: C = (F −32)(5/9) F = (9/5)C
# +32
# At standard temperature and pressure, the freezing and boiling points of
# water are 0 and 100 degrees Celsius, respectively. What are the freezing
# and boiling points of water in degrees Fahrenheit?
mpF <- (9/5) * (0) + 32
mpF
## [1] 32
bpF <- (9/5) * 100 + 32
bpF
## [1] 212
# 1.6. For the Celsius temperatures 0, 5, 10, 15, 20, 25, ..., 80, 85, 90,
# 95, 100, construct a conversion table that displays the corresponding
# Fahrenheit temperatures. Hint: to create the sequence of Celsius
# temperatures use seq(0, 100, 5).
Ctemp <- seq(0, 100, 5)
Ftemp <- Ctemp * (9/5) + 32
CFConversionTable <- cbind(Ctemp, Ftemp)
CFConversionTable
## Ctemp Ftemp
## [1,] 0 32
## [2,] 5 41
## [3,] 10 50
## [4,] 15 59
## [5,] 20 68
## [6,] 25 77
## [7,] 30 86
## [8,] 35 95
## [9,] 40 104
## [10,] 45 113
## [11,] 50 122
## [12,] 55 131
## [13,] 60 140
## [14,] 65 149
## [15,] 70 158
## [16,] 75 167
## [17,] 80 176
## [18,] 85 185
## [19,] 90 194
## [20,] 95 203
## [21,] 100 212
# 1.7. BMI is a reliable indicator of total body fat, which is related to
# the risk of disease and death. The score is valid for both men and women
# but it does have some limits. BMI does have some limitations: it may
# overestimate body fat in athletes and others who have a muscular build, it
# may underestimate body fat in older persons and others who have lost
# muscle mass. Table 1.5 Body mass index classification BMI Classification
# < 18.5 Underweight [18.5,25) Normal weight [25,30) Overweight \025 30 Obesity
# Applied Epidemiology Using R 5-Sep-2013 Body Mass Index (BMI) is
# calculated from your weight in kilograms and height in meters: BMI =kg/m^2
# pretend person:
lb = 125 #pounds
ft = 5.5 # 5'6'
m <- lb/2.2
h <- ft/3.3
m/(h^2)
## [1] 20.45
# 1kg = 2.2lb 1m = 3.3ft Calculate your BMI (don’t report it to us). 1.8.
# Using Table 1.1 on page 8, explain in words, and use R to illustrate, the
# difference between modulus and integer divide.
"Integer divide (%/%) divides a number and displays the largest integer that the number can be divided into"
## [1] "Integer divide (%/%) divides a number and displays the largest integer that the number can be divided into"
10%/%3 #integer divide
## [1] 3
10/3
## [1] 3.333
20%/%7
## [1] 2
20/7 #rounds down
## [1] 2.857
"Modulus (%%) displays the number remaining after an integer division"
## [1] "Modulus (%%) displays the number remaining after an integer division"
10%%3 # 10/3 = 3, remainder 1
## [1] 1
20%%7 #6 remains after dividing 20/7
## [1] 6
# 1.9. In mathematics, a logarithm (to base b) of a number x is written
# logb(x) and equals the exponent y that satisfies x = by. In other words, y
# = logb(x) is equivalent to x = by In R, the log function is to the base e.
# Implement the following R code and study the graph:
curve(log(x), 0, 6)
abline(v = c(1, exp(1)), h = c(0, 1), lty = 2)
# What kind of generalizations can you make about the natural logarithm and
# its base—the number e?
"log(e)=1, with base e, = 1, but regardless of the base, log(1)=1"
## [1] "log(e)=1, with base e, = 1, but regardless of the base, log(1)=1"
# 1.10. Risk (R) is a probability bounded between 0 and 1. Odds is the
# following transformation of R: Odds =R/1−R Use the following code to plot
# the odds:
curve(x/(1 - x), 0, 1)
# Now, use the following code to plot the log(odds):
curve(log(x/(1 - x)), 0, 1)
# What kind of generalizations can you make about the log(odds) as a
# transformation of risk?
"the log odds transformation is more proportionate to risk, so it makes more sense for interpretation of odds, with respect to risk."
## [1] "the log odds transformation is more proportionate to risk, so it makes more sense for interpretation of odds, with respect to risk."
# 1.11. Use the data in Table 1.6 on the next page. Assume one is
# HIV-negative. If the probability of infection per act is p, then the
# probability of not getting infected per act is (1− p). The probability of
# not getting infected after 2 consecutive acts is (1−p)^2, and after 3
# consecutive acts is (1−p)^3. Therefore, the probability of not getting
# infected after n consecutive acts is (1− p)^n, and the probability of
# getting infected after n consecutive acts is 1−(1− p)^n. For each
# non-blood transfusion transmission probability (per act risk) in Table
# 1.6, calculate the cumulative risk of being infected after one year (365
# days) if one carries out the same act once daily for one year with an
# HIV-infected partner. Do these cumulative risks make intuitive sense? Why
# or why not?
# probability of infection per act = p;
n <- 365
act <- c("BT", "IDU", "RAI", "PNS", "RPVI", "IAI", "IPVI", "ROI", "IOI")
pptt <- c(9000, 67, 50, 30, 10, 6.5, 5, 1, 0.5) #risk per 10K act
ppa <- (pptt/10000)
NoInf <- (1 - ppa)^n # p of no infection after 1 act/day for a year
InfR <- 1 - NoInf
data.frame((InfR), row.names = act)
## X.InfR.
## BT 1.00000
## IDU 0.91403
## RAI 0.83952
## PNS 0.66601
## RPVI 0.30593
## IAI 0.21127
## IPVI 0.16685
## ROI 0.03584
## IOI 0.01808
"These risks make sense as the most 'sero-invasive' acts are the riskiest (ie, whole blood transfusions, injected drugs, and receptive anal intercourse), whereas the least invasive are the least risky."
## [1] "These risks make sense as the most 'sero-invasive' acts are the riskiest (ie, whole blood transfusions, injected drugs, and receptive anal intercourse), whereas the least invasive are the least risky."
# 1.12. The source function in R is used to “source” (read in) ASCII text
# files. Take a group of R commands that worked from a previous problem
# above and paste them into an ASCII text file and save it with the name
# job01.R. Then from R command line, source the file. Here is how it looked
# on my Linux computer running R: >
# source('/home/tja/Documents/courses/ph251d/jobs/job01.R')
getwd()
## [1] "/Users/kelsi/Dropbox/PH251D"
source("HIV-job.R")
# Describe what happened. Now, set echo option to TRUE. >
# source('/home/tja/Documents/courses/ph251d/jobs/job01.R', echo = TRUE)
"The variables that were created are in my work space, but since I didn't 'name' the data frame, that does not appear"
## [1] "The variables that were created are in my work space, but since I didn't 'name' the data frame, that does not appear"
source("HIV-job.R", echo = TRUE)
##
## > n <- 365
##
## > act <- c("BT", "IDU", "RAI", "PNS", "RPVI", "IAI",
## + "IPVI", "ROI", "IOI")
##
## > pptt <- c(9000, 67, 50, 30, 10, 6.5, 5, 1, 0.5)
##
## > ppa <- (pptt/10000)
##
## > NoInf <- (1 - ppa)^n
##
## > InfR <- 1 - NoInf
##
## > rates <- data.frame((InfR), row.names = act)
##
## > rates
## X.InfR.
## BT 1.00000
## IDU 0.91403
## RAI 0.83952
## PNS 0.66601
## RPVI 0.30593
## IAI 0.21127
## IPVI 0.16685
## ROI 0.03584
## IOI 0.01808
# Describe what happened. To improve your understanding read the help file
# on the source function.
"I can see the output as if I manually ran each line of the script."
## [1] "I can see the output as if I manually ran each line of the script."
# 1.13. Now run the source again (without and with echo = TRUE) but each
# time create a log file using the sink function. Create two log files:
# job01.log1a and job01.log1b. >
# sink('/home/tja/Documents/courses/ph251d/jobs/job01.log1a') >
# source('/home/tja/Documents/courses/ph251d/jobs/job01.R') > sink() #closes
# connection > sink('/home/tja/Documents/courses/ph251d/jobs/job01.log1b') >
# source('/home/tja/Documents/courses/ph251d/jobs/job01.R', echo = TRUE) >
# sink() #closes connection
sink("HIV-job.log1a")
source("HIV-job.R")
sink()
sink("HIV-job.log1b")
source("HIV-job.R", echo = TRUE)
##
## > n <- 365
##
## > act <- c("BT", "IDU", "RAI", "PNS", "RPVI", "IAI",
## + "IPVI", "ROI", "IOI")
##
## > pptt <- c(9000, 67, 50, 30, 10, 6.5, 5, 1, 0.5)
##
## > ppa <- (pptt/10000)
##
## > NoInf <- (1 - ppa)^n
##
## > InfR <- 1 - NoInf
##
## > rates <- data.frame((InfR), row.names = act)
##
## > rates
## X.InfR.
## BT 1.00000
## IDU 0.91403
## RAI 0.83952
## PNS 0.66601
## RPVI 0.30593
## IAI 0.21127
## IPVI 0.16685
## ROI 0.03584
## IOI 0.01808
sink()
# Examine the log files and describe what happened.
"It made two log files, but only log1b prints the output. log1a is empty."
## [1] "It made two log files, but only log1b prints the output. log1a is empty."
# 1.14. Create a new job file (job02.R) with the following code:
n <- 365
per.act.risk <- c(0.5, 1, 5, 6.5, 10, 30, 50, 67)/10000
risks <- 1 - (1 - per.act.risk)^n
show(risks)
## [1] 0.01808 0.03584 0.16685 0.21127 0.30593 0.66601 0.83952 0.91403
# Source this file at the R command line and describes what happens.
"The 'show' command had the output of risks printed."
## [1] "The 'show' command had the output of risks printed."
# Applied