library(dplyr)
library(ggplot2)
library(knitr)
onezero <- function(x){
x <- as.factor(x)
x <- recode_factor(x, "1"="Yes", "0"="No")
}
truefalse <- function(x){
x <- as.factor(x)
x <- recode_factor(x, "0"="True","1"="False")
}
univariate <- function(var, ...)
{
if(class(var)=="numeric"|class(var)=="integer")
{
a <-tb0 %>%
summarise(
Mean = mean(var, na.rm=T),
Median = median(var, na.rm=T),
SD = sd(var, na.rm=T),
nMissing=sum(is.na(var)),
pMissing=round(100*nMissing/nrow(tb0), digits=2))
b<-ggplot() +
geom_histogram(aes(var), col="black" ,fill="light blue") +
geom_vline(aes(xintercept=mean(var,na.rm=T),colour="Mean"),size=1) +
geom_vline(aes(xintercept=median(var,na.rm=T),colour="Median"),size =1) +
xlab(...)+
theme_minimal() +
theme(plot.background=element_rect(fill="white",color="black", size = 1))+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
}
if(class(var)=="factor")
{
a<- tb0 %>%
group_by_(...)%>%
summarise(
n=n()) %>%
mutate(percent=round(100*n/sum(n), digits=2))
b<-ggplot() +
geom_bar(aes(na.omit(var)),col="black",fill="light blue") +
xlab(...)+
ylab("Count")+
theme_minimal()+
theme(plot.background=element_rect(fill="white",color="black",size=1))+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
}
print(b)
kable(a, type="markdown")
}
setwd('C:/Users/jkempke/Box Sync/PFT_St_George_Project/data')
tb0 <- read.csv("stgeorge_data.csv", na.strings=c('.'), stringsAsFactors=FALSE)
st.george <- read.csv(
"SGRQ scores.csv", na.strings=c('.'), stringsAsFactors=FALSE)
names(st.george) <- tolower(names(st.george))
tb0 <- tb0 %>%
transmute(
Obs=Obs,
study_id=as.character(study_id),
age=age,
sex=recode_factor(sex, "0"="Female", "1"="Male"),
tob=onezero(tob),
etoh=onezero(etoh),
ivdu=onezero(ivdu),
diabetes=onezero(comorbid___0),
liver=onezero(comorbid___4),
hep_b=onezero(comorbid___6),
tbdiag= as.Date(tbdiag, format = "%m/%d/%Y"),
txstart= as.Date(txstart, format = "%m/%d/%Y"),
case_def=as.factor(case_def),
multi=onezero(multi),
bilat=onezero(bilat),
cavit=onezero(cavit),
bicavit=onezero(bicavit),
cav_pres_surg=onezero(cav_pres_surg),
strep=onezero(strep),
inh=onezero(inh),
rif=onezero(rif),
etham=onezero(etham),
ethion=onezero(ethion),
ofl=onezero(ofl),
pas=onezero(pas),
cap=onezero(cap),
kan=onezero(kan),
mdr=onezero(mdr),
xdr=onezero(xdr),
date_cult_conv= as.Date(date_cult_conv, format = "%m/%d/%Y"),
indication=as.factor(indication),
vital = vital_,
fev = fev,
height=height,
weight=weight,
bmi=bmi,
mdr_date= as.Date(mdr_date, format = "%m/%d/%Y"),
surgtype=recode_factor(surgtype,
"0"="Pneumonectomy",
"1"="Lobectomy",
"2"="Segmentectomy",
"3"="Wedge",
"4"="Other"),
majorcom=recode_factor(majorcom,
"0"="BP fistula",
"1"="Empyema",
"2"="PE",
"3"="Hemorrhage",
"4"="Pneumonia",
"5"="Respiratory Insufficiency"),
outcome=recode_factor(outcome,
"0"="Failure",
"1"="Cured",
"2"="Completed",
"3"="Default",
"4"="Died",
"5"="Transfered Out"),
date_outcome= as.Date(date_outcome, format = "%m/%d/%Y"),
pft_date= as.Date(pft_date, format = "%m/%d/%Y"),
pft_weight=pft_weight,
fvc=fvc,
fvc_per=fvc_per,
fev1=fev1,
fev1_per=fev1_per,
ratio=ratio,
pef=pef,
ratio_2=ratio_2,
health=recode_factor(health,
"0"="Very Good",
"1"="Good",
"2"="Fair",
"3"="Poor",
"4"="Very Poor",
"5"="Transfer Out"),
case_def=recode_factor(case_def,
"0"="New",
"1"="PrevTx 1st Line",
"2"="PrevTx 2nd Line"),
cough=recode_factor(cough,
"0"="Most Days/Week",
"1"="Several Days/Week",
"2"="Few Days/Month",
"3"="Only w/ Chest Infections",
"4"="None"),
phlegm=recode_factor(phlegm,
"0"="Most Days/Week",
"1"="Several Days/Week",
"2"="Few Days/Month",
"3"="Only w/ Chest Infections",
"4"="None"),
shortness=recode_factor(shortness,
"0"="Most Days/Week",
"1"="Several Days/Week",
"2"="Few Days/Month",
"3"="Only w/ Chest Infections",
"4"="None"),
wheezing=recode_factor(wheezing,
"0"="Most Days/Week",
"1"="Several Days/Week",
"2"="Few Days/Month",
"3"="Only w/ Chest Infections",
"4"="None"),
attacks=recode_factor(attacks,
"0"=">3",
"1"="3",
"2"="2",
"1"="1",
"0"="None"),
attack_length=recode_factor(attack_length,
"0"=">/= Week",
"1"=">/=3days",
"2"="1 or 2 days",
"0"="<1 day"),
good_day=recode_factor(good_day,
"0"="None",
"1"="1 or 2",
"2"="3 or 4"
,"3"="Nearly Every day",
"4"="Every day"),
wheeze=onezero(wheeze),
chest_con=recode_factor(chest_con,
"0"="Most Important",
"1"="Causes quite a lot",
"2"="Causes a few",
"3"="No Problem"),
paid_employ=recode_factor(paid_employ,
"0"="Stop Work",
"1"="Interferes or Changed Workd",
"2"="Doesn't affect Work"),
sitting=truefalse(sitting),
washed=truefalse(washed),
walking=truefalse(walking),
walk_outside=truefalse(walk_outside),
walk_stairs=truefalse(walk_stairs),
walk_hill=truefalse(walk_hill),
games=truefalse(games),
cough_hurt=truefalse(cough_hurt),
cough_tired=truefalse(cough_tired),
breath_talk=truefalse(breath_talk),
breath_bend=truefalse(breath_bend),
breath_sleep=truefalse(breath_sleep),
exhausted=truefalse(exhausted),
embarrass=truefalse(embarrass),
nuisance=truefalse(nuisance),
panic=truefalse(panic),
control_chest=truefalse(control_chest),
expect_chest=truefalse(expect_chest),
frail=truefalse(frail),
exercise_safe=truefalse(exercise_safe),
much_effort=truefalse(much_effort),
med_help=truefalse(med_help),
med_embar=truefalse(med_embar),
med_se=truefalse(med_se),
med_inter=truefalse(med_inter),
time_wash=truefalse(time_wash),
time_bath=truefalse(time_bath),
walk_slow=truefalse(walk_slow),
time_hw=truefalse(time_hw),
walk_stair=truefalse(walk_stair),
walk_fast=truefalse(walk_fast),
breath_stairs=truefalse(breath_stairs),
breath_garden=truefalse(breath_garden),
breath_manual=truefalse(breath_manual),
sports=truefalse(sports),
recreation=truefalse(recreation),
shopping=truefalse(shopping),
housework=truefalse(housework),
chair=truefalse(chair),
finale=recode_factor(finale,
"0"="Does not stop me",
"1"="Stops 1 or 2 things",
"2"="Stops most things",
"3"="Stops everything"))
st.george <- st.george %>%
filter(is.na(id)==F)%>%
mutate(study_id = as.character(id))%>%
dplyr::select(-id)
tb1 <- left_join(tb0, st.george, by="study_id")
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 33.73907 | 31.1697 | 10.80798 | 0 | 0 |
| sex | n | percent |
|---|---|---|
| Female | 25 | 43.1 |
| Male | 33 | 56.9 |
| tob | n | percent |
|---|---|---|
| Yes | 23 | 39.66 |
| No | 35 | 60.34 |
| etoh | n | percent |
|---|---|---|
| Yes | 11 | 18.97 |
| No | 47 | 81.03 |
| ivdu | n | percent |
|---|---|---|
| No | 58 | 100 |
| diabetes | n | percent |
|---|---|---|
| Yes | 5 | 8.62 |
| No | 53 | 91.38 |
| liver | n | percent |
|---|---|---|
| Yes | 4 | 6.9 |
| No | 54 | 93.1 |
| hep_b | n | percent |
|---|---|---|
| Yes | 1 | 1.72 |
| No | 57 | 98.28 |
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
| case_def | n | percent |
|---|---|---|
| New | 24 | 41.38 |
| PrevTx 1st Line | 31 | 53.45 |
| PrevTx 2nd Line | 3 | 5.17 |
| multi | n | percent |
|---|---|---|
| Yes | 36 | 62.07 |
| No | 22 | 37.93 |
| bilat | n | percent |
|---|---|---|
| Yes | 25 | 43.1 |
| No | 33 | 56.9 |
| cavit | n | percent |
|---|---|---|
| Yes | 58 | 100 |
| bicavit | n | percent |
|---|---|---|
| Yes | 13 | 22.41 |
| No | 45 | 77.59 |
| cav_pres_surg | n | percent |
|---|---|---|
| Yes | 45 | 77.59 |
| No | 13 | 22.41 |
| strep | n | percent |
|---|---|---|
| Yes | 55 | 94.83 |
| No | 3 | 5.17 |
| inh | n | percent |
|---|---|---|
| Yes | 57 | 98.28 |
| No | 1 | 1.72 |
| rif | n | percent |
|---|---|---|
| Yes | 58 | 100 |
| etham | n | percent |
|---|---|---|
| Yes | 42 | 72.41 |
| No | 16 | 27.59 |
| ethion | n | percent |
|---|---|---|
| Yes | 40 | 68.97 |
| No | 15 | 25.86 |
| NA | 3 | 5.17 |
| ofl | n | percent |
|---|---|---|
| Yes | 15 | 25.86 |
| No | 41 | 70.69 |
| NA | 2 | 3.45 |
| pas | n | percent |
|---|---|---|
| Yes | 7 | 12.07 |
| No | 49 | 84.48 |
| NA | 2 | 3.45 |
| cap | n | percent |
|---|---|---|
| Yes | 12 | 20.69 |
| No | 42 | 72.41 |
| NA | 4 | 6.90 |
| kan | n | percent |
|---|---|---|
| Yes | 19 | 32.76 |
| No | 37 | 63.79 |
| NA | 2 | 3.45 |
| mdr | n | percent |
|---|---|---|
| Yes | 54 | 93.1 |
| No | 4 | 6.9 |
| xdr | n | percent |
|---|---|---|
| Yes | 9 | 15.52 |
| No | 49 | 84.48 |
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
| surgtype | n | percent |
|---|---|---|
| Lobectomy | 10 | 17.24 |
| Segmentectomy | 15 | 25.86 |
| Other | 2 | 3.45 |
| NA | 31 | 53.45 |
| majorcom | n | percent |
|---|---|---|
| BP fistula | 1 | 1.72 |
| Hemorrhage | 1 | 1.72 |
| NA | 56 | 96.55 |
| outcome | n | percent |
|---|---|---|
| Cured | 41 | 70.69 |
| Completed | 15 | 25.86 |
| Default | 2 | 3.45 |
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
## Warning: Removed 31 rows containing non-finite values (stat_bin).
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 3.296296 | 3 | 0.7753319 | 31 | 53.45 |
## Warning: Removed 31 rows containing non-finite values (stat_bin).
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 2.740741 | 3 | 0.8129998 | 31 | 53.45 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 1.721897 | 1.7 | 0.091622 | 0 | 0 |
## Warning: Removed 1 rows containing non-finite values (stat_bin).
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 65.40351 | 62 | 13.40157 | 1 | 1.72 |
## Warning: Removed 1 rows containing non-finite values (stat_bin).
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 22.01819 | 21.22449 | 3.766585 | 1 | 1.72 |
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 71.55172 | 71.5 | 13.47405 | 0 | 0 |
## Warning: Removed 1 rows containing non-finite values (stat_bin).
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 3795.789 | 3530 | 1208.966 | 1 | 1.72 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 89.34483 | 91.5 | 22.21944 | 0 | 0 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 2760.862 | 2600 | 1171.599 | 0 | 0 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 77.05172 | 80 | 26.74403 | 0 | 0 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 70.77586 | 71.5 | 12.35732 | 0 | 0 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 71.82759 | 74.5 | 29.67611 | 0 | 0 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 57.60345 | 44.5 | 35.08133 | 0 | 0 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 35.77155 | 28.575 | 30.79718 | 0 | 0 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 29.42897 | 23.28 | 31.25435 | 0 | 0 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 15.69362 | 3.725 | 26.12808 | 0 | 0 |
| Mean | Median | SD | nMissing | pMissing |
|---|---|---|---|---|
| 23.13552 | 12.245 | 27.09139 | 0 | 0 |
| health | n | percent |
|---|---|---|
| Very Good | 9 | 15.52 |
| Good | 23 | 39.66 |
| Fair | 17 | 29.31 |
| Poor | 6 | 10.34 |
| Very Poor | 3 | 5.17 |
| cough | n | percent |
|---|---|---|
| Most Days/Week | 14 | 24.14 |
| Several Days/Week | 7 | 12.07 |
| Few Days/Month | 4 | 6.90 |
| Only w/ Chest Infections | 16 | 27.59 |
| None | 17 | 29.31 |
| phlegm | n | percent |
|---|---|---|
| Most Days/Week | 13 | 22.41 |
| Several Days/Week | 7 | 12.07 |
| Few Days/Month | 2 | 3.45 |
| Only w/ Chest Infections | 13 | 22.41 |
| None | 23 | 39.66 |
| shortness | n | percent |
|---|---|---|
| Most Days/Week | 9 | 15.52 |
| Several Days/Week | 6 | 10.34 |
| Few Days/Month | 6 | 10.34 |
| Only w/ Chest Infections | 2 | 3.45 |
| None | 35 | 60.34 |
| wheezing | n | percent |
|---|---|---|
| Most Days/Week | 5 | 8.62 |
| Several Days/Week | 5 | 8.62 |
| Few Days/Month | 5 | 8.62 |
| Only w/ Chest Infections | 8 | 13.79 |
| None | 34 | 58.62 |
| NA | 1 | 1.72 |
| attacks | n | percent |
|---|---|---|
| 2 | 2 | 3.45 |
| 1 | 1 | 1.72 |
| None | 6 | 10.34 |
| NA | 49 | 84.48 |
| attack_length | n | percent |
|---|---|---|
| >/=3days | 6 | 10.34 |
| 1 or 2 days | 1 | 1.72 |
| <1 day | 23 | 39.66 |
| NA | 28 | 48.28 |
| good_day | n | percent |
|---|---|---|
| None | 7 | 12.07 |
| 1 or 2 | 4 | 6.90 |
| 3 or 4 | 6 | 10.34 |
| Nearly Every day | 23 | 39.66 |
| Every day | 18 | 31.03 |
| wheeze | n | percent |
|---|---|---|
| Yes | 4 | 6.90 |
| No | 11 | 18.97 |
| NA | 43 | 74.14 |
| chest_con | n | percent |
|---|---|---|
| Most Important | 8 | 13.79 |
| Causes quite a lot | 3 | 5.17 |
| Causes a few | 21 | 36.21 |
| No Problem | 26 | 44.83 |
| paid_employ | n | percent |
|---|---|---|
| Stop Work | 9 | 15.52 |
| Interferes or Changed Workd | 6 | 10.34 |
| Doesn’t affect Work | 43 | 74.14 |
| sitting | n | percent |
|---|---|---|
| True | 4 | 6.9 |
| False | 54 | 93.1 |
| washed | n | percent |
|---|---|---|
| True | 10 | 17.24 |
| False | 48 | 82.76 |
| walking | n | percent |
|---|---|---|
| True | 9 | 15.52 |
| False | 49 | 84.48 |
| walk_outside | n | percent |
|---|---|---|
| True | 11 | 18.97 |
| False | 47 | 81.03 |
| walk_stairs | n | percent |
|---|---|---|
| True | 30 | 51.72 |
| False | 28 | 48.28 |
| walk_hill | n | percent |
|---|---|---|
| True | 21 | 36.21 |
| False | 37 | 63.79 |
| games | n | percent |
|---|---|---|
| True | 36 | 62.07 |
| False | 22 | 37.93 |
| cough_hurt | n | percent |
|---|---|---|
| True | 10 | 17.24 |
| False | 48 | 82.76 |
| cough_tired | n | percent |
|---|---|---|
| True | 8 | 13.79 |
| False | 50 | 86.21 |
| breath_talk | n | percent |
|---|---|---|
| True | 9 | 15.52 |
| False | 49 | 84.48 |
| breath_bend | n | percent |
|---|---|---|
| True | 7 | 12.07 |
| False | 51 | 87.93 |
| breath_sleep | n | percent |
|---|---|---|
| True | 9 | 15.52 |
| False | 49 | 84.48 |
| exhausted | n | percent |
|---|---|---|
| True | 16 | 27.59 |
| False | 42 | 72.41 |
| embarrass | n | percent |
|---|---|---|
| True | 9 | 15.52 |
| False | 49 | 84.48 |
| nuisance | n | percent |
|---|---|---|
| True | 6 | 10.34 |
| False | 52 | 89.66 |
| panic | n | percent |
|---|---|---|
| True | 8 | 13.79 |
| False | 50 | 86.21 |
| control_chest | n | percent |
|---|---|---|
| True | 9 | 15.52 |
| False | 49 | 84.48 |
| expect_chest | n | percent |
|---|---|---|
| True | 10 | 17.24 |
| False | 48 | 82.76 |
| frail | n | percent |
|---|---|---|
| True | 10 | 17.24 |
| False | 48 | 82.76 |
| exercise_safe | n | percent |
|---|---|---|
| True | 17 | 29.31 |
| False | 41 | 70.69 |
| much_effort | n | percent |
|---|---|---|
| True | 11 | 18.97 |
| False | 47 | 81.03 |
| med_help | n | percent |
|---|---|---|
| True | 3 | 5.17 |
| False | 3 | 5.17 |
| NA | 52 | 89.66 |
| med_embar | n | percent |
|---|---|---|
| True | 1 | 1.72 |
| False | 5 | 8.62 |
| NA | 52 | 89.66 |
| med_se | n | percent |
|---|---|---|
| True | 1 | 1.72 |
| False | 5 | 8.62 |
| NA | 52 | 89.66 |
| med_inter | n | percent |
|---|---|---|
| True | 1 | 1.72 |
| False | 5 | 8.62 |
| NA | 52 | 89.66 |
| time_wash | n | percent |
|---|---|---|
| True | 6 | 10.34 |
| False | 52 | 89.66 |
| time_bath | n | percent |
|---|---|---|
| True | 7 | 12.07 |
| False | 51 | 87.93 |
| walk_slow | n | percent |
|---|---|---|
| True | 12 | 20.69 |
| False | 46 | 79.31 |
| time_hw | n | percent |
|---|---|---|
| True | 12 | 20.69 |
| False | 46 | 79.31 |
| walk_stair | n | percent |
|---|---|---|
| True | 14 | 24.14 |
| False | 44 | 75.86 |
| walk_fast | n | percent |
|---|---|---|
| True | 16 | 27.59 |
| False | 42 | 72.41 |
| breath_stairs | n | percent |
|---|---|---|
| True | 20 | 34.48 |
| False | 38 | 65.52 |
| breath_garden | n | percent |
|---|---|---|
| True | 33 | 56.9 |
| False | 25 | 43.1 |
| breath_manual | n | percent |
|---|---|---|
| True | 44 | 75.86 |
| False | 14 | 24.14 |
| sports | n | percent |
|---|---|---|
| True | 17 | 29.31 |
| False | 41 | 70.69 |
| recreation | n | percent |
|---|---|---|
| True | 13 | 22.41 |
| False | 45 | 77.59 |
| shopping | n | percent |
|---|---|---|
| True | 6 | 10.34 |
| False | 52 | 89.66 |
| housework | n | percent |
|---|---|---|
| True | 6 | 10.34 |
| False | 52 | 89.66 |
| chair | n | percent |
|---|---|---|
| True | 3 | 5.17 |
| False | 55 | 94.83 |
| finale | n | percent |
|---|---|---|
| Does not stop me | 27 | 46.55 |
| Stops 1 or 2 things | 20 | 34.48 |
| Stops most things | 3 | 5.17 |
| Stops everything | 8 | 13.79 |