Background

Assessing association between patient and pleural fluid characteristics and overall survival.

Datasets Construction

I changed categorical variables was 1 = Y and 0 = N. For the M variable, 1 = male and 0 = female.

To view R code select the button to the right.

schimm <- read.csv("schimm.csv", header = T, stringsAsFactors = F)

onezero <- function(x){
  x <- as.factor(x)
  x <- recode_factor(x, "0"="No","1"="Yes")
}

schimm <- schimm %>%
  mutate(
    smoke = onezero(smoke),
    mets = onezero(mets),
    prior.xrt = onezero(prior.xrt),
    pdl1 = onezero(pdl1),
    m = recode_factor(m, "0" = "Female", "1" = "Male")
  )

schimm$dead <- 1

schimm.cox <- coxph(Surv(OS, dead) ~ ., data = schimm)

schimm.glm <- glm(OS ~ ., data = schimm)

predicted.schimm <- predict.glm(schimm.glm, newdata = schimm)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
schimm$predicted <-predicted.schimm

Plots of Variables with Overall Survival

For scatterplots, I applied a Loess smoother to help the eye see any non-linear relationships. For categorical variables there are boxplots.

ggplot(data = schimm, aes(y=OS, x=age)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=NLR)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=dNLR)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=s.albumin)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=p.glucose)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

ggplot(data = schimm, aes(y=OS, x=p.TP)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=p.LDH)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=p.NLR)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=p.dNLR)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=m))+
  geom_boxplot()+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=smoke))+
  geom_boxplot()+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=mets))+
  geom_boxplot()+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=as.factor(prior.illness)))+
  geom_boxplot()+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=prior.xrt))+
  geom_boxplot()+
  theme_bw()

ggplot(data = schimm, aes(y=OS, x=pdl1))+
  geom_boxplot()+
  theme_bw()

Plots with Predicted Overall Survival

For this I created a Generalized Linear Model for overall survival with all other variables as independent variables. The I used the original data to predict overall survival from this model. There was a lot of missing predictions due to missingness in pdl1.

ggplot(data = schimm, aes(y=predicted, x=age)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).

ggplot(data = schimm, aes(y=predicted, x=NLR)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing missing values (geom_point).

ggplot(data = schimm, aes(y=predicted, x=dNLR)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing missing values (geom_point).

ggplot(data = schimm, aes(y=predicted, x=s.albumin)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing missing values (geom_point).

ggplot(data = schimm, aes(y=predicted, x=p.glucose)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing missing values (geom_point).

ggplot(data = schimm, aes(y=predicted, x=p.TP)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing missing values (geom_point).

ggplot(data = schimm, aes(y=predicted, x=p.LDH)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing missing values (geom_point).

ggplot(data = schimm, aes(y=predicted, x=p.NLR)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing missing values (geom_point).

ggplot(data = schimm, aes(y=predicted, x=p.dNLR)) +
  geom_point()+
  geom_smooth(method = "loess")+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing missing values (geom_point).

ggplot(data = schimm, aes(y=predicted, x=m))+
  geom_boxplot()+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

ggplot(data = schimm, aes(y=predicted, x=smoke))+
  geom_boxplot()+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

ggplot(data = schimm, aes(y=predicted, x=mets))+
  geom_boxplot()+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

ggplot(data = schimm, aes(y=predicted, x=as.factor(prior.illness)))+
  geom_boxplot()+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

ggplot(data = schimm, aes(y=predicted, x=prior.xrt))+
  geom_boxplot()+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

ggplot(data = schimm, aes(y=predicted, x=pdl1))+
  geom_boxplot()+
  theme_bw()
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

Survival PH curve

I generated a Cox PH model for overall survival time (assuming everyone died?) and then generated an adjusted PH Curve for the whole cohort, which automatically assumes uses mean values of all covariates.

ggsurvplot(survfit(schimm.cox),  data = schimm)