library(dplyr)
library(ggplot2)
library(knitr)
onezero <- function(x){
x <- as.factor(x)
x <- recode_factor(x, "1"="Yes", "0"="No")
}
truefalse <- function(x){
x <- as.factor(x)
x <- recode_factor(x, "0"="True","1"="False")
}
univariate <- function(var, ...)
{
if(class(var)=="numeric"|class(var)=="integer")
{
a <-tb0 %>%
summarise(
Mean = mean(var, na.rm=T),
Median = median(var, na.rm=T),
SD = sd(var, na.rm=T),
nMissing=sum(is.na(var)),
pMissing=round(100*nMissing/nrow(tb0), digits=2))
b<-ggplot() +
geom_histogram(aes(var), col="black" ,fill="light blue") +
geom_vline(aes(xintercept=mean(var,na.rm=T),colour="Mean"),size=1) +
geom_vline(aes(xintercept=median(var,na.rm=T),colour="Median"),size =1) +
xlab(...)+
theme_minimal() +
theme(plot.background=element_rect(fill="white",color="black", size = 1))+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
}
if(class(var)=="factor")
{
a<- tb0 %>%
group_by_(...)%>%
summarise(
n=n()) %>%
mutate(percent=round(100*n/sum(n), digits=2))
b<-ggplot() +
geom_bar(aes(na.omit(var)),col="black",fill="light blue") +
xlab(...)+
ylab("Count")+
theme_minimal()+
theme(plot.background=element_rect(fill="white",color="black",size=1))+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
}
print(b)
kable(a, type="markdown")
}
setwd('C:/Users/jkempke/Box Sync/PFT_St_George_Project/data')
tb0 <- read.csv("stgeorge_data.csv", na.strings=c('.'), stringsAsFactors=FALSE)
st.george <- read.csv(
"SGRQ scores.csv", na.strings=c('.'), stringsAsFactors=FALSE)
names(st.george) <- tolower(names(st.george))
tb0 <- tb0 %>%
transmute(
Obs=Obs,
study_id=as.character(study_id),
age=age,
sex=recode_factor(sex, "0"="Female", "1"="Male"),
tob=onezero(tob),
etoh=onezero(etoh),
ivdu=onezero(ivdu),
diabetes=onezero(comorbid___0),
liver=onezero(comorbid___4),
hep_b=onezero(comorbid___6),
tbdiag= as.Date(tbdiag, format = "%m/%d/%Y"),
txstart= as.Date(txstart, format = "%m/%d/%Y"),
case_def=as.factor(case_def),
multi=onezero(multi),
bilat=onezero(bilat),
cavit=onezero(cavit),
bicavit=onezero(bicavit),
cav_pres_surg=onezero(cav_pres_surg),
strep=onezero(strep),
inh=onezero(inh),
rif=onezero(rif),
etham=onezero(etham),
ethion=onezero(ethion),
ofl=onezero(ofl),
pas=onezero(pas),
cap=onezero(cap),
kan=onezero(kan),
mdr=onezero(mdr),
xdr=onezero(xdr),
date_cult_conv= as.Date(date_cult_conv, format = "%m/%d/%Y"),
indication=as.factor(indication),
vital = vital_,
fev = fev,
height=height,
weight=weight,
bmi=bmi,
mdr_date= as.Date(mdr_date, format = "%m/%d/%Y"),
surgtype=recode_factor(surgtype,
"0"="Pneumonectomy",
"1"="Lobectomy",
"2"="Segmentectomy",
"3"="Wedge",
"4"="Other"),
majorcom=recode_factor(majorcom,
"0"="BP fistula",
"1"="Empyema",
"2"="PE",
"3"="Hemorrhage",
"4"="Pneumonia",
"5"="Respiratory Insufficiency"),
outcome=recode_factor(outcome,
"0"="Failure",
"1"="Cured",
"2"="Completed",
"3"="Default",
"4"="Died",
"5"="Transfered Out"),
date_outcome= as.Date(date_outcome, format = "%m/%d/%Y"),
pft_date= as.Date(pft_date, format = "%m/%d/%Y"),
pft_weight=pft_weight,
fvc=fvc,
fvc_per=fvc_per,
fev1=fev1,
fev1_per=fev1_per,
ratio=ratio,
pef=pef,
ratio_2=ratio_2,
health=recode_factor(health,
"0"="Very Good",
"1"="Good",
"2"="Fair",
"3"="Poor",
"4"="Very Poor",
"5"="Transfer Out"),
case_def=recode_factor(case_def,
"0"="New",
"1"="PrevTx 1st Line",
"2"="PrevTx 2nd Line"),
cough=recode_factor(cough,
"0"="Most Days/Week",
"1"="Several Days/Week",
"2"="Few Days/Month",
"3"="Only w/ Chest Infections",
"4"="None"),
phlegm=recode_factor(phlegm,
"0"="Most Days/Week",
"1"="Several Days/Week",
"2"="Few Days/Month",
"3"="Only w/ Chest Infections",
"4"="None"),
shortness=recode_factor(shortness,
"0"="Most Days/Week",
"1"="Several Days/Week",
"2"="Few Days/Month",
"3"="Only w/ Chest Infections",
"4"="None"),
wheezing=recode_factor(wheezing,
"0"="Most Days/Week",
"1"="Several Days/Week",
"2"="Few Days/Month",
"3"="Only w/ Chest Infections",
"4"="None"),
attacks=recode_factor(attacks,
"0"=">3",
"1"="3",
"2"="2",
"1"="1",
"0"="None"),
attack_length=recode_factor(attack_length,
"0"=">/= Week",
"1"=">/=3days",
"2"="1 or 2 days",
"0"="<1 day"),
good_day=recode_factor(good_day,
"0"="None",
"1"="1 or 2",
"2"="3 or 4"
,"3"="Nearly Every day",
"4"="Every day"),
wheeze=onezero(wheeze),
chest_con=recode_factor(chest_con,
"0"="Most Important",
"1"="Causes quite a lot",
"2"="Causes a few",
"3"="No Problem"),
paid_employ=recode_factor(paid_employ,
"0"="Stop Work",
"1"="Interferes or Changed Workd",
"2"="Doesn't affect Work"),
sitting=truefalse(sitting),
washed=truefalse(washed),
walking=truefalse(walking),
walk_outside=truefalse(walk_outside),
walk_stairs=truefalse(walk_stairs),
walk_hill=truefalse(walk_hill),
games=truefalse(games),
cough_hurt=truefalse(cough_hurt),
cough_tired=truefalse(cough_tired),
breath_talk=truefalse(breath_talk),
breath_bend=truefalse(breath_bend),
breath_sleep=truefalse(breath_sleep),
exhausted=truefalse(exhausted),
embarrass=truefalse(embarrass),
nuisance=truefalse(nuisance),
panic=truefalse(panic),
control_chest=truefalse(control_chest),
expect_chest=truefalse(expect_chest),
frail=truefalse(frail),
exercise_safe=truefalse(exercise_safe),
much_effort=truefalse(much_effort),
med_help=truefalse(med_help),
med_embar=truefalse(med_embar),
med_se=truefalse(med_se),
med_inter=truefalse(med_inter),
time_wash=truefalse(time_wash),
time_bath=truefalse(time_bath),
walk_slow=truefalse(walk_slow),
time_hw=truefalse(time_hw),
walk_stair=truefalse(walk_stair),
walk_fast=truefalse(walk_fast),
breath_stairs=truefalse(breath_stairs),
breath_garden=truefalse(breath_garden),
breath_manual=truefalse(breath_manual),
sports=truefalse(sports),
recreation=truefalse(recreation),
shopping=truefalse(shopping),
housework=truefalse(housework),
chair=truefalse(chair),
finale=recode_factor(finale,
"0"="Does not stop me",
"1"="Stops 1 or 2 things",
"2"="Stops most things",
"3"="Stops everything"))
st.george <- st.george %>%
filter(is.na(id)==F)%>%
mutate(study_id = as.character(id))%>%
dplyr::select(-id)
tb1 <- left_join(tb0, st.george, by="study_id")
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
33.73907 | 31.1697 | 10.80798 | 0 | 0 |
sex | n | percent |
---|---|---|
Female | 25 | 43.1 |
Male | 33 | 56.9 |
tob | n | percent |
---|---|---|
Yes | 23 | 39.66 |
No | 35 | 60.34 |
etoh | n | percent |
---|---|---|
Yes | 11 | 18.97 |
No | 47 | 81.03 |
ivdu | n | percent |
---|---|---|
No | 58 | 100 |
diabetes | n | percent |
---|---|---|
Yes | 5 | 8.62 |
No | 53 | 91.38 |
liver | n | percent |
---|---|---|
Yes | 4 | 6.9 |
No | 54 | 93.1 |
hep_b | n | percent |
---|---|---|
Yes | 1 | 1.72 |
No | 57 | 98.28 |
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
case_def | n | percent |
---|---|---|
New | 24 | 41.38 |
PrevTx 1st Line | 31 | 53.45 |
PrevTx 2nd Line | 3 | 5.17 |
multi | n | percent |
---|---|---|
Yes | 36 | 62.07 |
No | 22 | 37.93 |
bilat | n | percent |
---|---|---|
Yes | 25 | 43.1 |
No | 33 | 56.9 |
cavit | n | percent |
---|---|---|
Yes | 58 | 100 |
bicavit | n | percent |
---|---|---|
Yes | 13 | 22.41 |
No | 45 | 77.59 |
cav_pres_surg | n | percent |
---|---|---|
Yes | 45 | 77.59 |
No | 13 | 22.41 |
strep | n | percent |
---|---|---|
Yes | 55 | 94.83 |
No | 3 | 5.17 |
inh | n | percent |
---|---|---|
Yes | 57 | 98.28 |
No | 1 | 1.72 |
rif | n | percent |
---|---|---|
Yes | 58 | 100 |
etham | n | percent |
---|---|---|
Yes | 42 | 72.41 |
No | 16 | 27.59 |
ethion | n | percent |
---|---|---|
Yes | 40 | 68.97 |
No | 15 | 25.86 |
NA | 3 | 5.17 |
ofl | n | percent |
---|---|---|
Yes | 15 | 25.86 |
No | 41 | 70.69 |
NA | 2 | 3.45 |
pas | n | percent |
---|---|---|
Yes | 7 | 12.07 |
No | 49 | 84.48 |
NA | 2 | 3.45 |
cap | n | percent |
---|---|---|
Yes | 12 | 20.69 |
No | 42 | 72.41 |
NA | 4 | 6.90 |
kan | n | percent |
---|---|---|
Yes | 19 | 32.76 |
No | 37 | 63.79 |
NA | 2 | 3.45 |
mdr | n | percent |
---|---|---|
Yes | 54 | 93.1 |
No | 4 | 6.9 |
xdr | n | percent |
---|---|---|
Yes | 9 | 15.52 |
No | 49 | 84.48 |
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
surgtype | n | percent |
---|---|---|
Lobectomy | 10 | 17.24 |
Segmentectomy | 15 | 25.86 |
Other | 2 | 3.45 |
NA | 31 | 53.45 |
majorcom | n | percent |
---|---|---|
BP fistula | 1 | 1.72 |
Hemorrhage | 1 | 1.72 |
NA | 56 | 96.55 |
outcome | n | percent |
---|---|---|
Cured | 41 | 70.69 |
Completed | 15 | 25.86 |
Default | 2 | 3.45 |
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
## Warning: Removed 31 rows containing non-finite values (stat_bin).
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
3.296296 | 3 | 0.7753319 | 31 | 53.45 |
## Warning: Removed 31 rows containing non-finite values (stat_bin).
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
2.740741 | 3 | 0.8129998 | 31 | 53.45 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
1.721897 | 1.7 | 0.091622 | 0 | 0 |
## Warning: Removed 1 rows containing non-finite values (stat_bin).
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
65.40351 | 62 | 13.40157 | 1 | 1.72 |
## Warning: Removed 1 rows containing non-finite values (stat_bin).
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
22.01819 | 21.22449 | 3.766585 | 1 | 1.72 |
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
71.55172 | 71.5 | 13.47405 | 0 | 0 |
## Warning: Removed 1 rows containing non-finite values (stat_bin).
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
3795.789 | 3530 | 1208.966 | 1 | 1.72 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
89.34483 | 91.5 | 22.21944 | 0 | 0 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
2760.862 | 2600 | 1171.599 | 0 | 0 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
77.05172 | 80 | 26.74403 | 0 | 0 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
70.77586 | 71.5 | 12.35732 | 0 | 0 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
71.82759 | 74.5 | 29.67611 | 0 | 0 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
57.60345 | 44.5 | 35.08133 | 0 | 0 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
35.77155 | 28.575 | 30.79718 | 0 | 0 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
29.42897 | 23.28 | 31.25435 | 0 | 0 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
15.69362 | 3.725 | 26.12808 | 0 | 0 |
Mean | Median | SD | nMissing | pMissing |
---|---|---|---|---|
23.13552 | 12.245 | 27.09139 | 0 | 0 |
health | n | percent |
---|---|---|
Very Good | 9 | 15.52 |
Good | 23 | 39.66 |
Fair | 17 | 29.31 |
Poor | 6 | 10.34 |
Very Poor | 3 | 5.17 |
cough | n | percent |
---|---|---|
Most Days/Week | 14 | 24.14 |
Several Days/Week | 7 | 12.07 |
Few Days/Month | 4 | 6.90 |
Only w/ Chest Infections | 16 | 27.59 |
None | 17 | 29.31 |
phlegm | n | percent |
---|---|---|
Most Days/Week | 13 | 22.41 |
Several Days/Week | 7 | 12.07 |
Few Days/Month | 2 | 3.45 |
Only w/ Chest Infections | 13 | 22.41 |
None | 23 | 39.66 |
shortness | n | percent |
---|---|---|
Most Days/Week | 9 | 15.52 |
Several Days/Week | 6 | 10.34 |
Few Days/Month | 6 | 10.34 |
Only w/ Chest Infections | 2 | 3.45 |
None | 35 | 60.34 |
wheezing | n | percent |
---|---|---|
Most Days/Week | 5 | 8.62 |
Several Days/Week | 5 | 8.62 |
Few Days/Month | 5 | 8.62 |
Only w/ Chest Infections | 8 | 13.79 |
None | 34 | 58.62 |
NA | 1 | 1.72 |
attacks | n | percent |
---|---|---|
2 | 2 | 3.45 |
1 | 1 | 1.72 |
None | 6 | 10.34 |
NA | 49 | 84.48 |
attack_length | n | percent |
---|---|---|
>/=3days | 6 | 10.34 |
1 or 2 days | 1 | 1.72 |
<1 day | 23 | 39.66 |
NA | 28 | 48.28 |
good_day | n | percent |
---|---|---|
None | 7 | 12.07 |
1 or 2 | 4 | 6.90 |
3 or 4 | 6 | 10.34 |
Nearly Every day | 23 | 39.66 |
Every day | 18 | 31.03 |
wheeze | n | percent |
---|---|---|
Yes | 4 | 6.90 |
No | 11 | 18.97 |
NA | 43 | 74.14 |
chest_con | n | percent |
---|---|---|
Most Important | 8 | 13.79 |
Causes quite a lot | 3 | 5.17 |
Causes a few | 21 | 36.21 |
No Problem | 26 | 44.83 |
paid_employ | n | percent |
---|---|---|
Stop Work | 9 | 15.52 |
Interferes or Changed Workd | 6 | 10.34 |
Doesn’t affect Work | 43 | 74.14 |
sitting | n | percent |
---|---|---|
True | 4 | 6.9 |
False | 54 | 93.1 |
washed | n | percent |
---|---|---|
True | 10 | 17.24 |
False | 48 | 82.76 |
walking | n | percent |
---|---|---|
True | 9 | 15.52 |
False | 49 | 84.48 |
walk_outside | n | percent |
---|---|---|
True | 11 | 18.97 |
False | 47 | 81.03 |
walk_stairs | n | percent |
---|---|---|
True | 30 | 51.72 |
False | 28 | 48.28 |
walk_hill | n | percent |
---|---|---|
True | 21 | 36.21 |
False | 37 | 63.79 |
games | n | percent |
---|---|---|
True | 36 | 62.07 |
False | 22 | 37.93 |
cough_hurt | n | percent |
---|---|---|
True | 10 | 17.24 |
False | 48 | 82.76 |
cough_tired | n | percent |
---|---|---|
True | 8 | 13.79 |
False | 50 | 86.21 |
breath_talk | n | percent |
---|---|---|
True | 9 | 15.52 |
False | 49 | 84.48 |
breath_bend | n | percent |
---|---|---|
True | 7 | 12.07 |
False | 51 | 87.93 |
breath_sleep | n | percent |
---|---|---|
True | 9 | 15.52 |
False | 49 | 84.48 |
exhausted | n | percent |
---|---|---|
True | 16 | 27.59 |
False | 42 | 72.41 |
embarrass | n | percent |
---|---|---|
True | 9 | 15.52 |
False | 49 | 84.48 |
nuisance | n | percent |
---|---|---|
True | 6 | 10.34 |
False | 52 | 89.66 |
panic | n | percent |
---|---|---|
True | 8 | 13.79 |
False | 50 | 86.21 |
control_chest | n | percent |
---|---|---|
True | 9 | 15.52 |
False | 49 | 84.48 |
expect_chest | n | percent |
---|---|---|
True | 10 | 17.24 |
False | 48 | 82.76 |
frail | n | percent |
---|---|---|
True | 10 | 17.24 |
False | 48 | 82.76 |
exercise_safe | n | percent |
---|---|---|
True | 17 | 29.31 |
False | 41 | 70.69 |
much_effort | n | percent |
---|---|---|
True | 11 | 18.97 |
False | 47 | 81.03 |
med_help | n | percent |
---|---|---|
True | 3 | 5.17 |
False | 3 | 5.17 |
NA | 52 | 89.66 |
med_embar | n | percent |
---|---|---|
True | 1 | 1.72 |
False | 5 | 8.62 |
NA | 52 | 89.66 |
med_se | n | percent |
---|---|---|
True | 1 | 1.72 |
False | 5 | 8.62 |
NA | 52 | 89.66 |
med_inter | n | percent |
---|---|---|
True | 1 | 1.72 |
False | 5 | 8.62 |
NA | 52 | 89.66 |
time_wash | n | percent |
---|---|---|
True | 6 | 10.34 |
False | 52 | 89.66 |
time_bath | n | percent |
---|---|---|
True | 7 | 12.07 |
False | 51 | 87.93 |
walk_slow | n | percent |
---|---|---|
True | 12 | 20.69 |
False | 46 | 79.31 |
time_hw | n | percent |
---|---|---|
True | 12 | 20.69 |
False | 46 | 79.31 |
walk_stair | n | percent |
---|---|---|
True | 14 | 24.14 |
False | 44 | 75.86 |
walk_fast | n | percent |
---|---|---|
True | 16 | 27.59 |
False | 42 | 72.41 |
breath_stairs | n | percent |
---|---|---|
True | 20 | 34.48 |
False | 38 | 65.52 |
breath_garden | n | percent |
---|---|---|
True | 33 | 56.9 |
False | 25 | 43.1 |
breath_manual | n | percent |
---|---|---|
True | 44 | 75.86 |
False | 14 | 24.14 |
sports | n | percent |
---|---|---|
True | 17 | 29.31 |
False | 41 | 70.69 |
recreation | n | percent |
---|---|---|
True | 13 | 22.41 |
False | 45 | 77.59 |
shopping | n | percent |
---|---|---|
True | 6 | 10.34 |
False | 52 | 89.66 |
housework | n | percent |
---|---|---|
True | 6 | 10.34 |
False | 52 | 89.66 |
chair | n | percent |
---|---|---|
True | 3 | 5.17 |
False | 55 | 94.83 |
finale | n | percent |
---|---|---|
Does not stop me | 27 | 46.55 |
Stops 1 or 2 things | 20 | 34.48 |
Stops most things | 3 | 5.17 |
Stops everything | 8 | 13.79 |