summary table

NSQIP Patient Characteristics, has gestational age, first shunt placement only
Characteristic VP Shunt Status
N = 7211
Sex
    Female 321 (45%)
    Male 400 (55%)
Birthweight (kg) 3.19 (2.55, 3.89)
Race
    American Indian or Alaska Native 3 (0.4%)
    Asian 11 (1.5%)
    Black or African American 182 (25%)
    Native Hawaiian or Other Pacific Islander 2 (0.3%)
    Unknown/Not Reported 132 (18%)
    White 391 (54%)
Hispanic Ethnicity 123 (19%)
    (Missing) 78
Gestational Age at Birth (weeks) 33.00 (26.00, 37.00)
Gestational Age at Surgery (weeks) 39.00 (37.00, 41.00)
Chronological Age at Surgery (days) 49.00 (11.00, 88.00)
PED_SHNT_1STPERM
    Yes 721 (100%)
PED_SHNT_HC_ETIOL
    Congenital hydrocephalus 273 (38%)
    Intracranial cyst 19 (2.6%)
    Intraventricular hemorrhage (IVH) of prematurity without other cause 287 (40%)
    Neoplastic 3 (0.4%)
    Other - insufficient information to classify type 12 (1.7%)
    Post hemorrhagic from other vascular lesion 31 (4.3%)
    Post infectious 22 (3.1%)
    Post traumatic 2 (0.3%)
    Spina bifida with Chiari malformation / myelomeningocele 66 (9.2%)
    Syndromic, not otherwise specified 2 (0.3%)
    Vascular anomaly 4 (0.6%)
PED_SHNT_INTRE1_RSN
    Distal catheter obstruction or malposition 5 (5.7%)
    Loculated ventricle 2 (2.3%)
    Mechanical issues 9 (10%)
    Other 9 (10%)
    Proximal catheter obstruction or malposition 9 (10%)
    Shunt infection 29 (33%)
    Wound disruption or CSF leak 24 (28%)
    (Missing) 634
under38weeks
    38plus 147 (20%)
    under38 574 (80%)
1 n (%); Median (IQR)
## [1] 54.62691

glm of 2019 data GLM

train and test sets of data

#No = 0, Yes = 1
dt = sort(sample(nrow(df_rem_NULL19), nrow(df_rem_NULL19)*.7))
train<-df_rem_NULL19[dt,-1]
test<-df_rem_NULL19[-dt,-1]

train$PED_SHNT_1STPERM <- ifelse(train$PED_SHNT_1STPERM=='No', 0, 1)
test$PED_SHNT_1STPERM <- ifelse(test$PED_SHNT_1STPERM=='No', 0, 1)

m1 <- glm(PED_SHNT_1STPERM ~ ., data = train, family = binomial(link='logit'), na.action="na.exclude") %>% stepAIC(trace = FALSE)

summary(m1)
## 
## Call:
## glm(formula = PED_SHNT_1STPERM ~ PED_SHNT_HC_ETIOL, family = binomial(link = "logit"), 
##     data = train, na.action = "na.exclude")
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -2.409e-06   2.409e-06   2.409e-06   2.409e-06   2.409e-06  
## 
## Coefficients:
##                                                                                         Estimate
## (Intercept)                                                                            2.657e+01
## PED_SHNT_HC_ETIOLIntracranial cyst                                                     1.805e-07
## PED_SHNT_HC_ETIOLIntraventricular hemorrhage (IVH) of prematurity without other cause  1.927e-07
## PED_SHNT_HC_ETIOLNeoplastic                                                            2.474e-09
## PED_SHNT_HC_ETIOLNULL                                                                 -5.313e+01
## PED_SHNT_HC_ETIOLOther - insufficient information to classify type                    -4.629e-06
## PED_SHNT_HC_ETIOLPost hemorrhagic from other vascular lesion                          -4.629e-06
## PED_SHNT_HC_ETIOLPost infectious                                                      -4.629e-06
## PED_SHNT_HC_ETIOLPost traumatic                                                       -4.629e-06
## PED_SHNT_HC_ETIOLSpina bifida with Chiari malformation / myelomeningocele             -4.629e-06
## PED_SHNT_HC_ETIOLSyndromic, not otherwise specified                                   -4.629e-06
## PED_SHNT_HC_ETIOLVascular anomaly                                                     -4.629e-06
##                                                                                       Std. Error
## (Intercept)                                                                            1.829e+04
## PED_SHNT_HC_ETIOLIntracranial cyst                                                     4.476e+04
## PED_SHNT_HC_ETIOLIntraventricular hemorrhage (IVH) of prematurity without other cause  2.827e+04
## PED_SHNT_HC_ETIOLNeoplastic                                                            3.651e+04
## PED_SHNT_HC_ETIOLNULL                                                                  2.597e+04
## PED_SHNT_HC_ETIOLOther - insufficient information to classify type                     4.020e+04
## PED_SHNT_HC_ETIOLPost hemorrhagic from other vascular lesion                           6.291e+04
## PED_SHNT_HC_ETIOLPost infectious                                                       5.507e+04
## PED_SHNT_HC_ETIOLPost traumatic                                                        6.974e+04
## PED_SHNT_HC_ETIOLSpina bifida with Chiari malformation / myelomeningocele              4.452e+04
## PED_SHNT_HC_ETIOLSyndromic, not otherwise specified                                    7.094e+04
## PED_SHNT_HC_ETIOLVascular anomaly                                                      9.692e+04
##                                                                                       z value
## (Intercept)                                                                             0.001
## PED_SHNT_HC_ETIOLIntracranial cyst                                                      0.000
## PED_SHNT_HC_ETIOLIntraventricular hemorrhage (IVH) of prematurity without other cause   0.000
## PED_SHNT_HC_ETIOLNeoplastic                                                             0.000
## PED_SHNT_HC_ETIOLNULL                                                                  -0.002
## PED_SHNT_HC_ETIOLOther - insufficient information to classify type                      0.000
## PED_SHNT_HC_ETIOLPost hemorrhagic from other vascular lesion                            0.000
## PED_SHNT_HC_ETIOLPost infectious                                                        0.000
## PED_SHNT_HC_ETIOLPost traumatic                                                         0.000
## PED_SHNT_HC_ETIOLSpina bifida with Chiari malformation / myelomeningocele               0.000
## PED_SHNT_HC_ETIOLSyndromic, not otherwise specified                                     0.000
## PED_SHNT_HC_ETIOLVascular anomaly                                                       0.000
##                                                                                       Pr(>|z|)
## (Intercept)                                                                              0.999
## PED_SHNT_HC_ETIOLIntracranial cyst                                                       1.000
## PED_SHNT_HC_ETIOLIntraventricular hemorrhage (IVH) of prematurity without other cause    1.000
## PED_SHNT_HC_ETIOLNeoplastic                                                              1.000
## PED_SHNT_HC_ETIOLNULL                                                                    0.998
## PED_SHNT_HC_ETIOLOther - insufficient information to classify type                       1.000
## PED_SHNT_HC_ETIOLPost hemorrhagic from other vascular lesion                             1.000
## PED_SHNT_HC_ETIOLPost infectious                                                         1.000
## PED_SHNT_HC_ETIOLPost traumatic                                                          1.000
## PED_SHNT_HC_ETIOLSpina bifida with Chiari malformation / myelomeningocele                1.000
## PED_SHNT_HC_ETIOLSyndromic, not otherwise specified                                      1.000
## PED_SHNT_HC_ETIOLVascular anomaly                                                        1.000
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.7134e+03  on 1554  degrees of freedom
## Residual deviance: 9.0215e-09  on 1543  degrees of freedom
## AIC: 24
## 
## Number of Fisher Scoring iterations: 25
plot(m1)

model_prob <- m1 %>% predict(test, type= "response")
predictions <- ifelse(model_prob > 0.5, "1","0")
mean(predictions == test$PED_SHNT_1STPERM) #77% accuracy
## [1] 1
sum(train$PED_SHNT_1STPERM);sum(test$PED_SHNT_1STPERM)
## [1] 1182
## [1] 510

redo logistic regression with continuously gestational age surgery

#No = 0, Yes = 1
dt = sort(sample(nrow(df_GA_surgery), nrow(df_GA_surgery)*.7))
train<-df_GA_surgery[dt,-1]
test<-df_GA_surgery[-dt,-1]

train$PED_SHNT_1STPERM <- ifelse(train$PED_SHNT_1STPERM=='No', 0, 1)
test$PED_SHNT_1STPERM <- ifelse(test$PED_SHNT_1STPERM=='No', 0, 1)

m1 <- glm(PED_SHNT_1STPERM ~ ., data = train, family = binomial(link='logit'), na.action="na.exclude") %>% stepAIC(trace = FALSE)

?stepAIC

summary(m1)
#plot(m1)
model_prob <- m1 %>% predict(test, type= "response")
predictions <- ifelse(model_prob > 0.5, "1","0")
mean(predictions == test$PED_SHNT_1STPERM) #91% accuracy

t.test for GA at birth and at surgery

t.test(df_GA_surgery$GESTATIONALAGE_BIRTH~df_GA_surgery$PED_SHNT_1STPERM)
t.test(df_GA_surgery$GESTATIONALAGE_SURGERY~df_GA_surgery$PED_SHNT_1STPERM)

stats with gestational age at surgery and at birth

ga <- df_GA_surgery[,c(10,11,13)]

x <- as.data.frame(table(ga[,2:3]))
ga_at_surgery <- x[1:25,]
ga_at_surgery$total <- x[26:50,3]+x[1:25,3]
names(ga_at_surgery)[names(ga_at_surgery) == 'Freq'] <- 'Failures'
ga_at_surgery$proportion_fail <- round(ga_at_surgery$Failures/ga_at_surgery$total,3)
ga_at_surgery <- ga_at_surgery[,-2]

x <- as.data.frame(table(ga[,-2]))
ga_at_birth <- x[1:20,]
ga_at_birth$total <- x[21:40,3]+x[1:20,3]
names(ga_at_birth )[names(ga_at_birth ) == 'Freq'] <- 'Failures'
ga_at_birth$proportion_fail <- round(ga_at_birth$Failures/ga_at_birth$total,3)
ga_at_birth <- ga_at_birth[,-2]
ggplot(data=as.data.frame(ga_at_birth), aes(x=GESTATIONALAGE_BIRTH, y=proportion_fail,fill=proportion_fail))+geom_bar(stat="identity") + 
  gghisto +theme(axis.text.x = element_text(size=10, angle=15)) + scale_y_continuous(labels = scales::percent)+scale_fill_gradient(low="mistyrose",high="lightcoral") +
  theme(legend.position="none") + xlab("Weeks Gestation at Birth") + ylab("Proportion reporting \nshunt revision") +ggtitle("Proportions of Premature Birth \nAges Reporting Shunt Revisions ")
ggplot(data=as.data.frame(ga_at_surgery), aes(x=GESTATIONALAGE_SURGERY, y=proportion_fail,fill=proportion_fail))+geom_bar(stat="identity") + 
  gghisto +theme(axis.text.x = element_text(size=10, angle=15)) + scale_y_continuous(labels = scales::percent)+scale_fill_gradient(low="mistyrose",high="lightcoral") +
  theme(legend.position="none") + xlab("Gestational Age at Surgery (weeks)") + ylab("Proportion reporting \nshunt revision") +ggtitle("Gestational Age by Proportional Shunt Failure")
elements <- list(
  geom_jitter(alpha = 0.7, size=7, shape=21, width = 0.01, height = 0.01),
          theme_classic(),
          gghisto)

ggplot(data = ga, aes(x = PED_SHNT_1STPERM, y = GESTATIONALAGE_BIRTH)) +
  geom_boxplot(outlier.colour="salmon", outlier.shape=16,
                outlier.size=4, width=.4, fill=c("indianred3","cornflowerblue"), alpha=0.3) + xlab(" ") + 
  ylab("Gestational Age at \nBirth (weeks)") + 
  scale_x_discrete(labels=c("No" = "Reported Shunt \nFailure", "Yes" = "No Reported \nFailure")) + gghisto +ggtitle("Gestational Age by \nShunt Failure")

random forest

#sex feamle = 1 male = 0
set.seed(222)
df3 <- df19[,c(2:4,9,11:15)]
df3$`Failure within first year after surgery` <- ifelse(df3$`Failure within first year after surgery`=='y', 1, 0)
df3$Sex <- ifelse(df3$Sex=='F', 1, 0)

names(df3) <- make.names(names(df3))
df3$Hydro.Etiology.Categorized <- as.factor(df3$Hydro.Etiology.Categorized)
dt = sort(sample(nrow(df3), nrow(df3)*.8))
train<-df3[dt,]
test<-df3[-dt,]

rf <- randomForest(as.factor(Failure.within.first.year.after.surgery)~., data=df3, proximity=TRUE, na.action = na.roughfix)

print(rf)

#looks bad 

below: closer examination of premature birth influence on shunt revision

df_premature0 <- df_rem_NULL19[,c(9:10)]

premature <- as.data.frame(table(df_premature0),stringsAsFactors=FALSE)
prem_no <- premature[1:9,]
prem_yes <- premature[10:18,]

df_premature <- prem_no
df_premature$first_shunt <- prem_yes$Freq
df_premature$multi_shunt <- prem_no$Freq
df_premature$total_cases <- df_premature$multi_shunt+df_premature$first_shunt
df_premature <- df_premature[,-c(2,3)]
df_premature$proportion_revised <- (df_premature$multi_shunt/df_premature$total_cases)

df_premature[8,1] <- "24 and fewer weeks gestation"

ggplot of premature birth versus shunt revisions 2019 data

ggplot(data=df_premature, aes(x=PREM_BIRTH, y=proportion_revised,fill=proportion_revised))+geom_bar(stat="identity")+
  scale_x_discrete(labels=c("<24 weeks","24 weeks","25-26 weeks","27-28 weeks","29-30 weeks","31-32 weeks","33-34 weeks","35-36 weeks","37+ weeks`")) + 
  gghisto +theme(axis.text.x = element_text(size=10, angle=15)) + scale_y_continuous(labels = scales::percent)+scale_fill_gradient(low="mistyrose",high="lightcoral") +
  theme(legend.position="none") + xlab("Weeks Gestation at Birth") + ylab("Proportion reporting \nshunt revision") +ggtitle("Proportions of Premature Birth \nCategories Reporting Shunt Revisions ")
prem_table <- table(df_rem_NULL19$PREM_BIRTH, df_rem_NULL19$PED_SHNT_1STPERM)
chisq.test(prem_table , correct=FALSE)

fisher.test(prem_table, simulate.p.value = TRUE)

library(chisq.posthoc.test)
chisq.posthoc.test(prem_table)

proportions test

case.vector <- df_premature$multi_shunt
names(case.vector) <- df_premature$PREM_BIRTH

total.vector <- df_premature$total_cases
names(total.vector) <- df_premature$PREM_BIRTH

prop.test(case.vector,total.vector)
prop.trend.test(case.vector,total.vector)

total shunt “failure”

sum(df_premature$multi_shunt)/sum(df_premature$total_cases)
sum(df_premature$first_shunt)

preliminary findings: mode of birth predicts needing a second shunt. days from admissions to operation are related to shunt revision. premature birth signficantly related to first or second shunt.

#summary of ALL shunts   
tbl_summary(df_GA_surgery[,c(2:6,8:13)],label = `PED_SHNT_1STPERM` ~ "First Shunt",by=`PED_SHNT_1STPERM`)%>%
  bold_labels()  %>%add_p()

#summary removing specified null values
tbl_summary(df_rem_NULL[,c(2:21)],label = `PED_SHNT_1STPERM` ~ "First Shunt",by=`PED_SHNT_1STPERM`)%>%
  bold_labels()  %>%add_p()

#summary of first time shunts, characterized by surgical adjuncts 
#tbl_summary(df_first[,c(2:5,7:16)], by=PED_SHNT_SURGADJS)%>%
  #bold_labels()  %>%add_p()