# Kelsi Perttula PH 251D September 16, 2013

# Problems 1.1. If you have not done so already, install R on your personal
# computer. What is the R workspace file on your operating systems? What is
# the file path to your R workspace file? What is the name of this workspace
# file?
"/User/kelsi/Dropbox/PH251D/HW1.R is the path to my workspace, HW1.R"
## [1] "/User/kelsi/Dropbox/PH251D/HW1.R is the path to my workspace, HW1.R"
save.image()  #is how I save the workspace
# R file- HW1.R
getwd()  #is how you know what directory your workspace was saved in
## [1] "/Users/kelsi/Dropbox/PH251D"

## 

# 1.2. By default, which R packages come already loaded? What are the file
# paths to the default R packages?

searchpaths()  #displays the paths of the auto-lodaed packages
##  [1] ".GlobalEnv"                                                              
##  [2] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/knitr"    
##  [3] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/stats"    
##  [4] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/graphics" 
##  [5] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/grDevices"
##  [6] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/utils"    
##  [7] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/datasets" 
##  [8] "/Library/Frameworks/R.framework/Versions/3.0/Resources/library/methods"  
##  [9] "Autoloads"                                                               
## [10] "/Library/Frameworks/R.framework/Resources/library/base"

# 1.3. List all the object in the current workspace. If there are none,
# create some data objects. Using one expression, remove all the objects in
# the current workspace.

x <- 5
y <- c(4, 6, 7, 2)

ls()
## [1] "x" "y"
rm(list = ls())
ls()
## character(0)

# 1.4. One inch equals 2.54 centimeters. Correct the following R code and
# create a conversion table.

inches <- 1:12
centimeters <- inches * 2.54  #switching from '/' to '*' fixed the table
z = cbind(inches, centimeters)
z
##       inches centimeters
##  [1,]      1        2.54
##  [2,]      2        5.08
##  [3,]      3        7.62
##  [4,]      4       10.16
##  [5,]      5       12.70
##  [6,]      6       15.24
##  [7,]      7       17.78
##  [8,]      8       20.32
##  [9,]      9       22.86
## [10,]     10       25.40
## [11,]     11       27.94
## [12,]     12       30.48


# 1.5. To convert between temperatures in degrees Celsius (C) and Farenheit
# (F), we use the following conversion formulas: C = (F −32)(5/9) F = (9/5)C
# +32

# At standard temperature and pressure, the freezing and boiling points of
# water are 0 and 100 degrees Celsius, respectively. What are the freezing
# and boiling points of water in degrees Fahrenheit?

mpF <- (9/5) * (0) + 32
mpF
## [1] 32

bpF <- (9/5) * 100 + 32
bpF
## [1] 212


# 1.6. For the Celsius temperatures 0, 5, 10, 15, 20, 25, ..., 80, 85, 90,
# 95, 100, construct a conversion table that displays the corresponding
# Fahrenheit temperatures.  Hint: to create the sequence of Celsius
# temperatures use seq(0, 100, 5).

Ctemp <- seq(0, 100, 5)
Ftemp <- Ctemp * (9/5) + 32
CFConversionTable <- cbind(Ctemp, Ftemp)
CFConversionTable
##       Ctemp Ftemp
##  [1,]     0    32
##  [2,]     5    41
##  [3,]    10    50
##  [4,]    15    59
##  [5,]    20    68
##  [6,]    25    77
##  [7,]    30    86
##  [8,]    35    95
##  [9,]    40   104
## [10,]    45   113
## [11,]    50   122
## [12,]    55   131
## [13,]    60   140
## [14,]    65   149
## [15,]    70   158
## [16,]    75   167
## [17,]    80   176
## [18,]    85   185
## [19,]    90   194
## [20,]    95   203
## [21,]   100   212

# 1.7. BMI is a reliable indicator of total body fat, which is related to
# the risk of disease and death. The score is valid for both men and women
# but it does have some limits. BMI does have some limitations: it may
# overestimate body fat in athletes and others who have a muscular build, it
# may underestimate body fat in older persons and others who have lost
# muscle mass.  Table 1.5 Body mass index classification BMI Classification
# < 18.5 Underweight [18.5,25) Normal weight [25,30) Overweight \025 30 Obesity
# Applied Epidemiology Using R 5-Sep-2013 Body Mass Index (BMI) is
# calculated from your weight in kilograms and height in meters: BMI =kg/m^2
# pretend person:
lb = 125  #pounds
ft = 5.5  # 5'6'
m <- lb/2.2
h <- ft/3.3
m/(h^2)
## [1] 20.45

# 1kg = 2.2lb 1m = 3.3ft Calculate your BMI (don’t report it to us).  1.8.
# Using Table 1.1 on page 8, explain in words, and use R to illustrate, the
# difference between modulus and integer divide.

"Integer divide (%/%) divides a number and displays the largest integer that the number can be divided into"
## [1] "Integer divide (%/%) divides a number and displays the largest integer that the number can be divided into"
10%/%3  #integer divide
## [1] 3
10/3
## [1] 3.333
20%/%7
## [1] 2
20/7  #rounds down
## [1] 2.857
"Modulus (%%) displays the number remaining after an integer division"
## [1] "Modulus (%%) displays the number remaining after an integer division"
10%%3  # 10/3 = 3, remainder 1
## [1] 1
20%%7  #6 remains after dividing 20/7
## [1] 6

# 1.9. In mathematics, a logarithm (to base b) of a number x is written
# logb(x) and equals the exponent y that satisfies x = by. In other words, y
# = logb(x) is equivalent to x = by In R, the log function is to the base e.
# Implement the following R code and study the graph:

curve(log(x), 0, 6)
abline(v = c(1, exp(1)), h = c(0, 1), lty = 2)

plot of chunk unnamed-chunk-1


# What kind of generalizations can you make about the natural logarithm and
# its base—the number e?

"log(e)=1, with base e, = 1, but regardless of the base, log(1)=1"
## [1] "log(e)=1, with base e, = 1, but regardless of the base, log(1)=1"


# 1.10. Risk (R) is a probability bounded between 0 and 1. Odds is the
# following transformation of R: Odds =R/1−R Use the following code to plot
# the odds:

curve(x/(1 - x), 0, 1)

plot of chunk unnamed-chunk-1



# Now, use the following code to plot the log(odds):

curve(log(x/(1 - x)), 0, 1)

plot of chunk unnamed-chunk-1


# What kind of generalizations can you make about the log(odds) as a
# transformation of risk?

"the log odds transformation is more proportionate to risk, so it makes more sense for interpretation of odds, with respect to risk."
## [1] "the log odds transformation is more proportionate to risk, so it makes more sense for interpretation of odds, with respect to risk."

# 1.11. Use the data in Table 1.6 on the next page. Assume one is
# HIV-negative. If the probability of infection per act is p, then the
# probability of not getting infected per act is (1− p). The probability of
# not getting infected after 2 consecutive acts is (1−p)^2, and after 3
# consecutive acts is (1−p)^3. Therefore, the probability of not getting
# infected after n consecutive acts is (1− p)^n, and the probability of
# getting infected after n consecutive acts is 1−(1− p)^n. For each
# non-blood transfusion transmission probability (per act risk) in Table
# 1.6, calculate the cumulative risk of being infected after one year (365
# days) if one carries out the same act once daily for one year with an
# HIV-infected partner. Do these cumulative risks make intuitive sense? Why
# or why not?

# probability of infection per act = p;
n <- 365
act <- c("BT", "IDU", "RAI", "PNS", "RPVI", "IAI", "IPVI", "ROI", "IOI")
pptt <- c(9000, 67, 50, 30, 10, 6.5, 5, 1, 0.5)  #risk per 10K act
ppa <- (pptt/10000)
NoInf <- (1 - ppa)^n  # p of no infection after 1 act/day for a year

InfR <- 1 - NoInf
data.frame((InfR), row.names = act)
##      X.InfR.
## BT   1.00000
## IDU  0.91403
## RAI  0.83952
## PNS  0.66601
## RPVI 0.30593
## IAI  0.21127
## IPVI 0.16685
## ROI  0.03584
## IOI  0.01808

"These risks make sense as the most 'sero-invasive' acts are the riskiest (ie, whole blood transfusions, injected drugs, and receptive anal intercourse), whereas the least invasive are the least risky."
## [1] "These risks make sense as the most 'sero-invasive' acts are the riskiest (ie, whole blood transfusions, injected drugs, and receptive anal intercourse), whereas the least invasive are the least risky."

# 1.12. The source function in R is used to “source” (read in) ASCII text
# files. Take a group of R commands that worked from a previous problem
# above and paste them into an ASCII text file and save it with the name
# job01.R. Then from R command line, source the file. Here is how it looked
# on my Linux computer running R: >
# source('/home/tja/Documents/courses/ph251d/jobs/job01.R')

getwd()
## [1] "/Users/kelsi/Dropbox/PH251D"
source("HIV-job.R")

# Describe what happened. Now, set echo option to TRUE.  >
# source('/home/tja/Documents/courses/ph251d/jobs/job01.R', echo = TRUE)
"The variables that were created are in my work space, but since I didn't 'name' the data frame, that does not appear"
## [1] "The variables that were created are in my work space, but since I didn't 'name' the data frame, that does not appear"

source("HIV-job.R", echo = TRUE)
## 
## > n <- 365
## 
## > act <- c("BT", "IDU", "RAI", "PNS", "RPVI", "IAI", 
## +     "IPVI", "ROI", "IOI")
## 
## > pptt <- c(9000, 67, 50, 30, 10, 6.5, 5, 1, 0.5)
## 
## > ppa <- (pptt/10000)
## 
## > NoInf <- (1 - ppa)^n
## 
## > InfR <- 1 - NoInf
## 
## > rates <- data.frame((InfR), row.names = act)
## 
## > rates
##      X.InfR.
## BT   1.00000
## IDU  0.91403
## RAI  0.83952
## PNS  0.66601
## RPVI 0.30593
## IAI  0.21127
## IPVI 0.16685
## ROI  0.03584
## IOI  0.01808

# Describe what happened. To improve your understanding read the help file
# on the source function.

"I can see the output as if I manually ran each line of the script."
## [1] "I can see the output as if I manually ran each line of the script."

# 1.13. Now run the source again (without and with echo = TRUE) but each
# time create a log file using the sink function. Create two log files:
# job01.log1a and job01.log1b.  >
# sink('/home/tja/Documents/courses/ph251d/jobs/job01.log1a') >
# source('/home/tja/Documents/courses/ph251d/jobs/job01.R') > sink() #closes
# connection > sink('/home/tja/Documents/courses/ph251d/jobs/job01.log1b') >
# source('/home/tja/Documents/courses/ph251d/jobs/job01.R', echo = TRUE) >
# sink() #closes connection

sink("HIV-job.log1a")
source("HIV-job.R")
sink()
sink("HIV-job.log1b")
source("HIV-job.R", echo = TRUE)
## 
## > n <- 365
## 
## > act <- c("BT", "IDU", "RAI", "PNS", "RPVI", "IAI", 
## +     "IPVI", "ROI", "IOI")
## 
## > pptt <- c(9000, 67, 50, 30, 10, 6.5, 5, 1, 0.5)
## 
## > ppa <- (pptt/10000)
## 
## > NoInf <- (1 - ppa)^n
## 
## > InfR <- 1 - NoInf
## 
## > rates <- data.frame((InfR), row.names = act)
## 
## > rates
##      X.InfR.
## BT   1.00000
## IDU  0.91403
## RAI  0.83952
## PNS  0.66601
## RPVI 0.30593
## IAI  0.21127
## IPVI 0.16685
## ROI  0.03584
## IOI  0.01808
sink()


# Examine the log files and describe what happened.
"It made two log files, but only log1b prints the output. log1a is empty."
## [1] "It made two log files, but only log1b prints the output. log1a is empty."

# 1.14. Create a new job file (job02.R) with the following code:
n <- 365
per.act.risk <- c(0.5, 1, 5, 6.5, 10, 30, 50, 67)/10000
risks <- 1 - (1 - per.act.risk)^n
show(risks)
## [1] 0.01808 0.03584 0.16685 0.21127 0.30593 0.66601 0.83952 0.91403
# Source this file at the R command line and describes what happens.

"The 'show' command had the output of risks printed."
## [1] "The 'show' command had the output of risks printed."
# Applied