heartrate Data

Import & Wrangle heart rate data

library(tidyverse)
df <- read.csv("heartrate1000-1.csv")
df$sex <- as.factor(df$sex)
summary(df$sex)
##   0   1 
## 502 498
df$sex<-factor(df$sex, levels=c(0,1),
               labels=c("male","female"))
df$occupation<-factor(df$occupation,
                      levels=c(0,1,2,3,4,5),
                      labels=c("attorney","nurse","physician",
                               "accountant","teacher","dining host"))
df$fitness<-factor(df$fitness,
                   levels=c(0,1),
                   labels=c("sedentary","trained"))
df_long<-df %>%
  pivot_longer(c(hr_t0,hr_week1,hr_week2,hr_week3,hr_week4),
               names_to="time",
               values_to="heartrate")

Descriptive Statistics

Question 1

summary(df$occupation)
##    attorney       nurse   physician  accountant     teacher dining host 
##         164         163         157         167         173         176

tapply review

Question 2

tapply(df$hr_t0,df$occupation,mean)
##    attorney       nurse   physician  accountant     teacher dining host 
##    61.57165    64.88497    64.02548    61.50479    66.28439    60.55455
tapply(df$hr_week4,df$occupation,median)
##    attorney       nurse   physician  accountant     teacher dining host 
##      55.400      58.600      58.200      56.000      59.400      53.525

Question 3

summary(df$fitness)
## sedentary   trained 
##       494       506
tapply(df$hr_t0,df$fitness,min)
## sedentary   trained 
##     56.05     43.55
tapply(df$hr_week1,df$fitness,min)
## sedentary   trained 
##     54.45     43.55
tapply(df$hr_week2,df$fitness,min)
## sedentary   trained 
##     52.45     42.55
tapply(df$hr_week3,df$fitness,min)
## sedentary   trained 
##     49.20     41.55
tapply(df$hr_week4,df$fitness,min)
## sedentary   trained 
##     47.20     38.55

Subsetting Data

Question 4

df_long_m65<-filter(df_long,sex=="male",heartrate>65)
nrow(df_long_m65)
## [1] 786
df_long_sub1<-filter(df_long,sex=="female",
                       occupation=="attorney"|occupation=="physician")
nrow(df_long_sub1)
## [1] 780
df_long_sub2<-filter(df_long,sex=="male",
                     occupation=="nurse"|occupation=="physician")
nrow(df_long_sub2)
## [1] 835

Question 5

subset_df<-df %>%
  filter(fitness=="trained",
         sex=="female",hr_t0<55)
nrow(subset_df)
## [1] 56

Question 6

df_long_sub3<-df_long %>%
  filter(time=="hr_week4",sex=="male",
         heartrate<60|fitness=="trained",
         occupation=="teacher")
tapply(df_long_sub3$heartrate,
       df_long_sub3$occupation,mean)["teacher"]
##  teacher 
## 57.90976

Cancer Data

Import and wrangle cancer data

cancer<-read.csv("cancer-1.csv")

Question 7

cancer<-cancer %>%
  mutate(TRT=factor(TRT,levels=c(0,1),labels=c("placebo","aloe_juice")))
cancer_long<-cancer %>%
  pivot_longer(cols=c(TOTALCIN,TOTALCW2,TOTALCW4, TOTALCW6),
    names_to="time",values_to="oral_condition")
cancer_sub<-cancer_long %>%
  filter(TRT=="aloe_juice",AGE>50,oral_condition>=5,oral_condition<=15)
nrow(cancer_sub)
## [1] 28

Piping

df_long$heartrate %>% mean()
## [1] 60.3377
mean(df_long$heartrate)
## [1] 60.3377

Question 8

result<-cancer_long %>%
  select(TRT,AGE,STAGE,oral_condition) %>%
  filter(TRT==1, AGE>=30,STAGE>0) %>%
  drop_na()
nrow(result)
## [1] 0