library(tidyverse)
df <- read.csv("heartrate1000-1.csv")
df$sex <- as.factor(df$sex)
summary(df$sex)
## 0 1
## 502 498
df$sex<-factor(df$sex, levels=c(0,1),
labels=c("male","female"))
df$occupation<-factor(df$occupation,
levels=c(0,1,2,3,4,5),
labels=c("attorney","nurse","physician",
"accountant","teacher","dining host"))
df$fitness<-factor(df$fitness,
levels=c(0,1),
labels=c("sedentary","trained"))
df_long<-df %>%
pivot_longer(c(hr_t0,hr_week1,hr_week2,hr_week3,hr_week4),
names_to="time",
values_to="heartrate")
summary(df$occupation)
## attorney nurse physician accountant teacher dining host
## 164 163 157 167 173 176
tapply(df$hr_t0,df$occupation,mean)
## attorney nurse physician accountant teacher dining host
## 61.57165 64.88497 64.02548 61.50479 66.28439 60.55455
tapply(df$hr_week4,df$occupation,median)
## attorney nurse physician accountant teacher dining host
## 55.400 58.600 58.200 56.000 59.400 53.525
summary(df$fitness)
## sedentary trained
## 494 506
tapply(df$hr_t0,df$fitness,min)
## sedentary trained
## 56.05 43.55
tapply(df$hr_week1,df$fitness,min)
## sedentary trained
## 54.45 43.55
tapply(df$hr_week2,df$fitness,min)
## sedentary trained
## 52.45 42.55
tapply(df$hr_week3,df$fitness,min)
## sedentary trained
## 49.20 41.55
tapply(df$hr_week4,df$fitness,min)
## sedentary trained
## 47.20 38.55
df_long_m65<-filter(df_long,sex=="male",heartrate>65)
nrow(df_long_m65)
## [1] 786
df_long_sub1<-filter(df_long,sex=="female",
occupation=="attorney"|occupation=="physician")
nrow(df_long_sub1)
## [1] 780
df_long_sub2<-filter(df_long,sex=="male",
occupation=="nurse"|occupation=="physician")
nrow(df_long_sub2)
## [1] 835
subset_df<-df %>%
filter(fitness=="trained",
sex=="female",hr_t0<55)
nrow(subset_df)
## [1] 56
df_long_sub3<-df_long %>%
filter(time=="hr_week4",sex=="male",
heartrate<60|fitness=="trained",
occupation=="teacher")
tapply(df_long_sub3$heartrate,
df_long_sub3$occupation,mean)["teacher"]
## teacher
## 57.90976
cancer<-read.csv("cancer-1.csv")
cancer<-cancer %>%
mutate(TRT=factor(TRT,levels=c(0,1),labels=c("placebo","aloe_juice")))
cancer_long<-cancer %>%
pivot_longer(cols=c(TOTALCIN,TOTALCW2,TOTALCW4, TOTALCW6),
names_to="time",values_to="oral_condition")
cancer_sub<-cancer_long %>%
filter(TRT=="aloe_juice",AGE>50,oral_condition>=5,oral_condition<=15)
nrow(cancer_sub)
## [1] 28
df_long$heartrate %>% mean()
## [1] 60.3377
mean(df_long$heartrate)
## [1] 60.3377
result<-cancer_long %>%
select(TRT,AGE,STAGE,oral_condition) %>%
filter(TRT==1, AGE>=30,STAGE>0) %>%
drop_na()
nrow(result)
## [1] 0