summary table
| Characteristic | VP Shunt Status |
|---|---|
| N = 7211 | |
| Sex | |
| Female | 321 (45%) |
| Male | 400 (55%) |
| Birthweight (kg) | 3.19 (2.55, 3.89) |
| Race | |
| American Indian or Alaska Native | 3 (0.4%) |
| Asian | 11 (1.5%) |
| Black or African American | 182 (25%) |
| Native Hawaiian or Other Pacific Islander | 2 (0.3%) |
| Unknown/Not Reported | 132 (18%) |
| White | 391 (54%) |
| Hispanic Ethnicity | 123 (19%) |
| (Missing) | 78 |
| Gestational Age at Birth (weeks) | 33.00 (26.00, 37.00) |
| Gestational Age at Surgery (weeks) | 39.00 (37.00, 41.00) |
| Chronological Age at Surgery (days) | 49.00 (11.00, 88.00) |
| PED_SHNT_1STPERM | |
| Yes | 721 (100%) |
| PED_SHNT_HC_ETIOL | |
| Congenital hydrocephalus | 273 (38%) |
| Intracranial cyst | 19 (2.6%) |
| Intraventricular hemorrhage (IVH) of prematurity without other cause | 287 (40%) |
| Neoplastic | 3 (0.4%) |
| Other - insufficient information to classify type | 12 (1.7%) |
| Post hemorrhagic from other vascular lesion | 31 (4.3%) |
| Post infectious | 22 (3.1%) |
| Post traumatic | 2 (0.3%) |
| Spina bifida with Chiari malformation / myelomeningocele | 66 (9.2%) |
| Syndromic, not otherwise specified | 2 (0.3%) |
| Vascular anomaly | 4 (0.6%) |
| PED_SHNT_INTRE1_RSN | |
| Distal catheter obstruction or malposition | 5 (5.7%) |
| Loculated ventricle | 2 (2.3%) |
| Mechanical issues | 9 (10%) |
| Other | 9 (10%) |
| Proximal catheter obstruction or malposition | 9 (10%) |
| Shunt infection | 29 (33%) |
| Wound disruption or CSF leak | 24 (28%) |
| (Missing) | 634 |
| under38weeks | |
| 38plus | 147 (20%) |
| under38 | 574 (80%) |
| 1 n (%); Median (IQR) | |
## [1] 54.62691
glm of 2019 data GLM
train and test sets of data
#No = 0, Yes = 1
dt = sort(sample(nrow(df_rem_NULL19), nrow(df_rem_NULL19)*.7))
train<-df_rem_NULL19[dt,-1]
test<-df_rem_NULL19[-dt,-1]
train$PED_SHNT_1STPERM <- ifelse(train$PED_SHNT_1STPERM=='No', 0, 1)
test$PED_SHNT_1STPERM <- ifelse(test$PED_SHNT_1STPERM=='No', 0, 1)
m1 <- glm(PED_SHNT_1STPERM ~ ., data = train, family = binomial(link='logit'), na.action="na.exclude") %>% stepAIC(trace = FALSE)
summary(m1)
##
## Call:
## glm(formula = PED_SHNT_1STPERM ~ PED_SHNT_HC_ETIOL, family = binomial(link = "logit"),
## data = train, na.action = "na.exclude")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.409e-06 2.409e-06 2.409e-06 2.409e-06 2.409e-06
##
## Coefficients:
## Estimate
## (Intercept) 2.657e+01
## PED_SHNT_HC_ETIOLIntracranial cyst 1.805e-07
## PED_SHNT_HC_ETIOLIntraventricular hemorrhage (IVH) of prematurity without other cause 1.927e-07
## PED_SHNT_HC_ETIOLNeoplastic 2.474e-09
## PED_SHNT_HC_ETIOLNULL -5.313e+01
## PED_SHNT_HC_ETIOLOther - insufficient information to classify type -4.629e-06
## PED_SHNT_HC_ETIOLPost hemorrhagic from other vascular lesion -4.629e-06
## PED_SHNT_HC_ETIOLPost infectious -4.629e-06
## PED_SHNT_HC_ETIOLPost traumatic -4.629e-06
## PED_SHNT_HC_ETIOLSpina bifida with Chiari malformation / myelomeningocele -4.629e-06
## PED_SHNT_HC_ETIOLSyndromic, not otherwise specified -4.629e-06
## PED_SHNT_HC_ETIOLVascular anomaly -4.629e-06
## Std. Error
## (Intercept) 1.829e+04
## PED_SHNT_HC_ETIOLIntracranial cyst 4.476e+04
## PED_SHNT_HC_ETIOLIntraventricular hemorrhage (IVH) of prematurity without other cause 2.827e+04
## PED_SHNT_HC_ETIOLNeoplastic 3.651e+04
## PED_SHNT_HC_ETIOLNULL 2.597e+04
## PED_SHNT_HC_ETIOLOther - insufficient information to classify type 4.020e+04
## PED_SHNT_HC_ETIOLPost hemorrhagic from other vascular lesion 6.291e+04
## PED_SHNT_HC_ETIOLPost infectious 5.507e+04
## PED_SHNT_HC_ETIOLPost traumatic 6.974e+04
## PED_SHNT_HC_ETIOLSpina bifida with Chiari malformation / myelomeningocele 4.452e+04
## PED_SHNT_HC_ETIOLSyndromic, not otherwise specified 7.094e+04
## PED_SHNT_HC_ETIOLVascular anomaly 9.692e+04
## z value
## (Intercept) 0.001
## PED_SHNT_HC_ETIOLIntracranial cyst 0.000
## PED_SHNT_HC_ETIOLIntraventricular hemorrhage (IVH) of prematurity without other cause 0.000
## PED_SHNT_HC_ETIOLNeoplastic 0.000
## PED_SHNT_HC_ETIOLNULL -0.002
## PED_SHNT_HC_ETIOLOther - insufficient information to classify type 0.000
## PED_SHNT_HC_ETIOLPost hemorrhagic from other vascular lesion 0.000
## PED_SHNT_HC_ETIOLPost infectious 0.000
## PED_SHNT_HC_ETIOLPost traumatic 0.000
## PED_SHNT_HC_ETIOLSpina bifida with Chiari malformation / myelomeningocele 0.000
## PED_SHNT_HC_ETIOLSyndromic, not otherwise specified 0.000
## PED_SHNT_HC_ETIOLVascular anomaly 0.000
## Pr(>|z|)
## (Intercept) 0.999
## PED_SHNT_HC_ETIOLIntracranial cyst 1.000
## PED_SHNT_HC_ETIOLIntraventricular hemorrhage (IVH) of prematurity without other cause 1.000
## PED_SHNT_HC_ETIOLNeoplastic 1.000
## PED_SHNT_HC_ETIOLNULL 0.998
## PED_SHNT_HC_ETIOLOther - insufficient information to classify type 1.000
## PED_SHNT_HC_ETIOLPost hemorrhagic from other vascular lesion 1.000
## PED_SHNT_HC_ETIOLPost infectious 1.000
## PED_SHNT_HC_ETIOLPost traumatic 1.000
## PED_SHNT_HC_ETIOLSpina bifida with Chiari malformation / myelomeningocele 1.000
## PED_SHNT_HC_ETIOLSyndromic, not otherwise specified 1.000
## PED_SHNT_HC_ETIOLVascular anomaly 1.000
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1.7134e+03 on 1554 degrees of freedom
## Residual deviance: 9.0215e-09 on 1543 degrees of freedom
## AIC: 24
##
## Number of Fisher Scoring iterations: 25
plot(m1)
model_prob <- m1 %>% predict(test, type= "response")
predictions <- ifelse(model_prob > 0.5, "1","0")
mean(predictions == test$PED_SHNT_1STPERM) #77% accuracy
## [1] 1
sum(train$PED_SHNT_1STPERM);sum(test$PED_SHNT_1STPERM)
## [1] 1182
## [1] 510
redo logistic regression with continuously gestational age surgery
#No = 0, Yes = 1
dt = sort(sample(nrow(df_GA_surgery), nrow(df_GA_surgery)*.7))
train<-df_GA_surgery[dt,-1]
test<-df_GA_surgery[-dt,-1]
train$PED_SHNT_1STPERM <- ifelse(train$PED_SHNT_1STPERM=='No', 0, 1)
test$PED_SHNT_1STPERM <- ifelse(test$PED_SHNT_1STPERM=='No', 0, 1)
m1 <- glm(PED_SHNT_1STPERM ~ ., data = train, family = binomial(link='logit'), na.action="na.exclude") %>% stepAIC(trace = FALSE)
?stepAIC
summary(m1)
#plot(m1)
model_prob <- m1 %>% predict(test, type= "response")
predictions <- ifelse(model_prob > 0.5, "1","0")
mean(predictions == test$PED_SHNT_1STPERM) #91% accuracy
t.test for GA at birth and at surgery
t.test(df_GA_surgery$GESTATIONALAGE_BIRTH~df_GA_surgery$PED_SHNT_1STPERM)
t.test(df_GA_surgery$GESTATIONALAGE_SURGERY~df_GA_surgery$PED_SHNT_1STPERM)
stats with gestational age at surgery and at birth
ga <- df_GA_surgery[,c(10,11,13)]
x <- as.data.frame(table(ga[,2:3]))
ga_at_surgery <- x[1:25,]
ga_at_surgery$total <- x[26:50,3]+x[1:25,3]
names(ga_at_surgery)[names(ga_at_surgery) == 'Freq'] <- 'Failures'
ga_at_surgery$proportion_fail <- round(ga_at_surgery$Failures/ga_at_surgery$total,3)
ga_at_surgery <- ga_at_surgery[,-2]
x <- as.data.frame(table(ga[,-2]))
ga_at_birth <- x[1:20,]
ga_at_birth$total <- x[21:40,3]+x[1:20,3]
names(ga_at_birth )[names(ga_at_birth ) == 'Freq'] <- 'Failures'
ga_at_birth$proportion_fail <- round(ga_at_birth$Failures/ga_at_birth$total,3)
ga_at_birth <- ga_at_birth[,-2]
ggplot(data=as.data.frame(ga_at_birth), aes(x=GESTATIONALAGE_BIRTH, y=proportion_fail,fill=proportion_fail))+geom_bar(stat="identity") +
gghisto +theme(axis.text.x = element_text(size=10, angle=15)) + scale_y_continuous(labels = scales::percent)+scale_fill_gradient(low="mistyrose",high="lightcoral") +
theme(legend.position="none") + xlab("Weeks Gestation at Birth") + ylab("Proportion reporting \nshunt revision") +ggtitle("Proportions of Premature Birth \nAges Reporting Shunt Revisions ")
ggplot(data=as.data.frame(ga_at_surgery), aes(x=GESTATIONALAGE_SURGERY, y=proportion_fail,fill=proportion_fail))+geom_bar(stat="identity") +
gghisto +theme(axis.text.x = element_text(size=10, angle=15)) + scale_y_continuous(labels = scales::percent)+scale_fill_gradient(low="mistyrose",high="lightcoral") +
theme(legend.position="none") + xlab("Gestational Age at Surgery (weeks)") + ylab("Proportion reporting \nshunt revision") +ggtitle("Gestational Age by Proportional Shunt Failure")
elements <- list(
geom_jitter(alpha = 0.7, size=7, shape=21, width = 0.01, height = 0.01),
theme_classic(),
gghisto)
ggplot(data = ga, aes(x = PED_SHNT_1STPERM, y = GESTATIONALAGE_BIRTH)) +
geom_boxplot(outlier.colour="salmon", outlier.shape=16,
outlier.size=4, width=.4, fill=c("indianred3","cornflowerblue"), alpha=0.3) + xlab(" ") +
ylab("Gestational Age at \nBirth (weeks)") +
scale_x_discrete(labels=c("No" = "Reported Shunt \nFailure", "Yes" = "No Reported \nFailure")) + gghisto +ggtitle("Gestational Age by \nShunt Failure")
random forest
#sex feamle = 1 male = 0
set.seed(222)
df3 <- df19[,c(2:4,9,11:15)]
df3$`Failure within first year after surgery` <- ifelse(df3$`Failure within first year after surgery`=='y', 1, 0)
df3$Sex <- ifelse(df3$Sex=='F', 1, 0)
names(df3) <- make.names(names(df3))
df3$Hydro.Etiology.Categorized <- as.factor(df3$Hydro.Etiology.Categorized)
dt = sort(sample(nrow(df3), nrow(df3)*.8))
train<-df3[dt,]
test<-df3[-dt,]
rf <- randomForest(as.factor(Failure.within.first.year.after.surgery)~., data=df3, proximity=TRUE, na.action = na.roughfix)
print(rf)
#looks bad
below: closer examination of premature birth influence on shunt revision
df_premature0 <- df_rem_NULL19[,c(9:10)]
premature <- as.data.frame(table(df_premature0),stringsAsFactors=FALSE)
prem_no <- premature[1:9,]
prem_yes <- premature[10:18,]
df_premature <- prem_no
df_premature$first_shunt <- prem_yes$Freq
df_premature$multi_shunt <- prem_no$Freq
df_premature$total_cases <- df_premature$multi_shunt+df_premature$first_shunt
df_premature <- df_premature[,-c(2,3)]
df_premature$proportion_revised <- (df_premature$multi_shunt/df_premature$total_cases)
df_premature[8,1] <- "24 and fewer weeks gestation"
ggplot of premature birth versus shunt revisions 2019 data
ggplot(data=df_premature, aes(x=PREM_BIRTH, y=proportion_revised,fill=proportion_revised))+geom_bar(stat="identity")+
scale_x_discrete(labels=c("<24 weeks","24 weeks","25-26 weeks","27-28 weeks","29-30 weeks","31-32 weeks","33-34 weeks","35-36 weeks","37+ weeks`")) +
gghisto +theme(axis.text.x = element_text(size=10, angle=15)) + scale_y_continuous(labels = scales::percent)+scale_fill_gradient(low="mistyrose",high="lightcoral") +
theme(legend.position="none") + xlab("Weeks Gestation at Birth") + ylab("Proportion reporting \nshunt revision") +ggtitle("Proportions of Premature Birth \nCategories Reporting Shunt Revisions ")
prem_table <- table(df_rem_NULL19$PREM_BIRTH, df_rem_NULL19$PED_SHNT_1STPERM)
chisq.test(prem_table , correct=FALSE)
fisher.test(prem_table, simulate.p.value = TRUE)
library(chisq.posthoc.test)
chisq.posthoc.test(prem_table)
proportions test
case.vector <- df_premature$multi_shunt
names(case.vector) <- df_premature$PREM_BIRTH
total.vector <- df_premature$total_cases
names(total.vector) <- df_premature$PREM_BIRTH
prop.test(case.vector,total.vector)
prop.trend.test(case.vector,total.vector)
total shunt “failure”
sum(df_premature$multi_shunt)/sum(df_premature$total_cases)
sum(df_premature$first_shunt)
preliminary findings: mode of birth predicts needing a second shunt. days from admissions to operation are related to shunt revision. premature birth signficantly related to first or second shunt.
#summary of ALL shunts
tbl_summary(df_GA_surgery[,c(2:6,8:13)],label = `PED_SHNT_1STPERM` ~ "First Shunt",by=`PED_SHNT_1STPERM`)%>%
bold_labels() %>%add_p()
#summary removing specified null values
tbl_summary(df_rem_NULL[,c(2:21)],label = `PED_SHNT_1STPERM` ~ "First Shunt",by=`PED_SHNT_1STPERM`)%>%
bold_labels() %>%add_p()
#summary of first time shunts, characterized by surgical adjuncts
#tbl_summary(df_first[,c(2:5,7:16)], by=PED_SHNT_SURGADJS)%>%
#bold_labels() %>%add_p()