Predictive Insights into Diabetic Foot Amputation

This analysis uses electronic medical records from 434 patients with Diabetic Foot Infection (DFI), collected in a tertiary care hospital in Central Malaysia, between January 2010 and December 2019. The objective of this research was to investigate the predictive factors associated with amputation in DFI patients. The information extracted from clinical records included demographics, clinical characteristics (inflammatory markers and clinical complications), and amputation details. Binary logistic regression was performed to identify significant predictors of major vs minor amputation.

Socio-demographic characteristics

dia <- read_excel("DFU census - Parichehr.xlsx")
names(dia) <- tolower(names(dia))
names(dia)

##  [1] "name"                            "gender"                         
##  [3] "race"                            "age"                            
##  [5] "admission date"                  "discharge date"                 
##  [7] "wound"                           "wagner cl"                      
##  [9] "esr"                             "crp"                            
## [11] "wbc"                             "amputation"                     
## [13] "organism"                        "sensitivity"                    
## [15] "abx"                             "specify abx"                    
## [17] "recurrent infection"             "smoke"                          
## [19] "peripheral vascular diseas(pvd)" "peripheral neuropathy"          
## [21] "osteomyelitis"                   "gangerene"

#Gender
dia$gender <- tolower(dia$gender)
dia$gender <- as.factor(dia$gender)
prop.table(table(dia$gender))*100

## 
##   female     male 
## 37.09677 62.90323

#Ethnicity
dia$race <- tolower(dia$race)
dia$race[dia$race=="india"] <- "indian"
dia$race[dia$race=="indian muslin"] <- "indian"
dia$race[dia$race=="indian muslim"] <- "indian"
dia$race[dia$race=="cina"] <- "chinese"
dia$race[dia$race=="indonesian"] <- "other"
dia$race[dia$race=="indonesian pr"] <- "other"
dia$race[dia$race=="myanmar"] <- "other"
dia$race[dia$race=="swedish"] <- "other"
dia$race <- as.factor(dia$race)
prop.table(table(dia$race))*100

## 
##   chinese    indian     malay     other 
## 20.967742 14.976959 61.751152  2.304147

#Age
dia$groups[dia$age<41] <- "1. 18-40"
dia$groups[dia$age>40 & dia$age<66] <- "2. 41-65"
dia$groups[dia$age>65] <- "3. >65"
prop.table(table(dia$groups))*100

## 
## 1. 18-40 2. 41-65   3. >65 
##  4.83871 55.99078 39.17051

#Smoking status
dia$smoke <- tolower(dia$smoke)
dia$smoke[dia$smoke=="ex smoker"|dia$smoke=="no (ex smoker)"| dia$smoke=="no(ex smoker)" |dia$smoke=="non(ex smoker)"] <- "3. ex-smoker" 
dia$smoke[dia$smoke=="no smoke"|dia$smoke=="non"|dia$smoke=="no"] <- "1. non-smoker"
dia$smoke[dia$smoke=="yes smoker"|dia$smoke=="yes"|dia$smoke=="yes smoke"|dia$smoke=="yes(smoke)"|dia$smoke=="smoker"] <- "2. smoker"
dia$smoke <- as.factor(dia$smoke)
prop.table(table(dia$smoke))*100

## 
## 1. non-smoker     2. smoker  3. ex-smoker 
##      61.24661      26.55827      12.19512

Indicators of inflammation

#WBC
dia$wbc <- as.numeric(dia$wbc)
dia$wbcgroups[dia$wbc<4.0] <- "1. < 4 abnormal"
dia$wbcgroups[dia$wbc>3.9 & dia$wbc<11.1] <- "2. normal"
dia$wbcgroups[dia$wbc>11.0] <- "3. > 11 abnormal"
prop.table(table(dia$wbcgroups))*100

## 
##  1. < 4 abnormal        2. normal 3. > 11 abnormal 
##        0.2320186       17.6334107       82.1345708

describe(dia$wbc)

##    vars   n  mean   sd median trimmed  mad min  max range skew kurtosis   se
## X1    1 431 17.68 7.54   16.4   16.96 6.67 2.5 64.1  61.6 1.42     4.35 0.36

#CRP
dia$crp <- as.numeric(dia$crp)
dia$crpgroup[dia$crp>5] <- "1. abnormal"
table(dia$crpgroup)

## 
## 1. abnormal 
##         228

describe(dia$crp)

##    vars   n   mean     sd median trimmed    mad  min    max  range skew
## X1    1 228 170.33 109.97 150.09  162.14 124.97 9.76 490.99 481.23 0.58
##    kurtosis   se
## X1    -0.35 7.28

#ESR
dia$esr <- as.numeric(dia$esr)
describe(dia$esr)

##    vars   n  mean    sd median trimmed   mad min max range  skew kurtosis   se
## X1    1 262 96.98 28.18    103  101.13 22.24   2 229   227 -0.96     3.16 1.74

Complications from diabetes

#Wagner score
dia$wagner <- as.numeric(dia$`wagner cl`)
dia$wagner[dia$wagner==6] <- 5
prop.table(table(dia$wagner))*100

## 
##         1         2         3         4         5 
##  2.102804 17.990654 15.420561 40.420561 24.065421

#Amputation type
dia$amputation <- tolower(dia$amputation)
dia$amputation[dia$amputation=="major"] <- "2.major"
dia$amputation[dia$amputation=="minor"] <- "1.minor"
dia$amputation <- as.factor(dia$amputation)
prop.table(table(dia$amputation))*100

## 
##  1.minor  2.major 
## 70.73733 29.26267

#Gangrene
dia$gangerene <- tolower(dia$gangerene)
dia$gangyes <- str_extract(dia$gangerene, "yes")
dia$gangno <- str_extract(dia$gangerene, "no")
dia$gang <- paste(dia$gangyes, dia$gangno)
dia$gang <- str_remove(dia$gang, "NA")
dia$gang <- str_trim(dia$gang)
dia$gang <- as.factor(dia$gang)
dia$gang[dia$gang=="NA"] <- NA
dia$gang <- droplevels(dia$gang)
prop.table(table(dia$gang))*100

## 
##      no     yes 
## 33.5689 66.4311

#Peripheral neuropathy
dia$`peripheral neuropathy` <- tolower(dia$`peripheral neuropathy`)
dia$pn <- ifelse(is.na(dia$`peripheral neuropathy`), "no", "yes")
dia$pn <- as.factor(dia$pn)
prop.table(table(dia$pn))*100

## 
##       no      yes 
## 74.65438 25.34562

#Osteomyelitis
dia$osteomyelitis <- tolower(dia$osteomyelitis)
dia$osteoyes <- str_extract(dia$osteomyelitis, "yes")
dia$osteono <- str_extract(dia$osteomyelitis, "no")
dia$osteo <- paste(dia$osteoyes, dia$osteono)
dia$osteo <- str_remove(dia$osteo, "NA")
dia$osteo <- str_trim(dia$osteo)
dia$osteo <- as.factor(dia$osteo)
dia$osteo[dia$osteo=="NA"] <- NA
dia$osteo <- droplevels(dia$osteo)
prop.table(table(dia$osteo))*100

## 
##       no      yes 
## 35.76389 64.23611

# Peripheral vascular disease 
dia$pvd <- tolower(dia$`peripheral vascular diseas(pvd)`)
dia$pvd <- str_extract(dia$pvd, "yes")
table(dia$pvd)

## 
## yes 
##  60

Bivariate tests

Chi-square tests

options(scipen=999)

age <- table(dia$groups,dia$amputation)
round(prop.table(age, margin = 1) * 100, 1)

##           
##            1.minor 2.major
##   1. 18-40    85.7    14.3
##   2. 41-65    75.7    24.3
##   3. >65      61.8    38.2

chisq.test(age)

## 
##  Pearson's Chi-squared test
## 
## data:  age
## X-squared = 11.802, df = 2, p-value = 0.002736

sex <- table(dia$gender,dia$amputation)
round(prop.table(sex, margin = 1) * 100, 1)

##         
##          1.minor 2.major
##   female    70.2    29.8
##   male      71.1    28.9

chisq.test(sex)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  sex
## X-squared = 0.0071479, df = 1, p-value = 0.9326

ethnicity <- table(dia$race,dia$amputation)
round(prop.table(ethnicity, margin = 1) * 100, 1)

##          
##           1.minor 2.major
##   chinese    69.2    30.8
##   indian     76.9    23.1
##   malay      68.7    31.3
##   other     100.0     0.0

chisq.test(ethnicity)

## Warning in chisq.test(ethnicity): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  ethnicity
## X-squared = 5.9986, df = 3, p-value = 0.1117

smoking <- table(dia$smoke,dia$amputation)
round(prop.table(smoking, margin = 1) * 100, 1)

##                
##                 1.minor 2.major
##   1. non-smoker    70.4    29.6
##   2. smoker        79.6    20.4
##   3. ex-smoker     62.2    37.8

chisq.test(smoking)

## 
##  Pearson's Chi-squared test
## 
## data:  smoking
## X-squared = 5.2124, df = 2, p-value = 0.07381

Mann-Whitney tests

#Normality test
shapiro.test(dia$wbc)

## 
##  Shapiro-Wilk normality test
## 
## data:  dia$wbc
## W = 0.91833, p-value = 0.00000000000001575

shapiro.test(dia$crp)

## 
##  Shapiro-Wilk normality test
## 
## data:  dia$crp
## W = 0.95663, p-value = 0.000002224

shapiro.test(dia$esr)

## 
##  Shapiro-Wilk normality test
## 
## data:  dia$esr
## W = 0.85903, p-value = 0.000000000000009671

# WBC
dia %>%
  group_by(amputation) %>%
  summarise(
    median_wbc = median(wbc, na.rm = TRUE),
    IQR_wbc = IQR(wbc, na.rm = TRUE)
  )

## # A tibble: 2 × 3
##   amputation median_wbc IQR_wbc
##   <fct>           <dbl>   <dbl>
## 1 1.minor          15.5    9.22
## 2 2.major          18      9.15

wilcox.test(wbc ~ amputation, data = dia)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  wbc by amputation
## W = 14708, p-value = 0.00009677
## alternative hypothesis: true location shift is not equal to 0

#  CRP
dia %>%
  group_by(amputation) %>%
  summarise(
    median_crp = median(crp, na.rm = TRUE),
    IQR_crp = IQR(crp, na.rm = TRUE)
  )

## # A tibble: 2 × 3
##   amputation median_crp IQR_crp
##   <fct>           <dbl>   <dbl>
## 1 1.minor          139.    172.
## 2 2.major          191.    123.

wilcox.test(crp ~ amputation, data = dia)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  crp by amputation
## W = 4095, p-value = 0.002383
## alternative hypothesis: true location shift is not equal to 0

#  ESR
dia %>%
  group_by(amputation) %>%
  summarise(
    median_esr = median(esr, na.rm = TRUE),
    IQR_esr = IQR(esr, na.rm = TRUE)
  )

## # A tibble: 2 × 3
##   amputation median_esr IQR_esr
##   <fct>           <dbl>   <dbl>
## 1 1.minor           101      28
## 2 2.major           108      29

wilcox.test(esr ~ amputation, data = dia)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  esr by amputation
## W = 6204.5, p-value = 0.02136
## alternative hypothesis: true location shift is not equal to 0

Z-tests for two proportions

#Complications

#Function to perform two-proportion z-test manually
z_test_proportion <- function(x1, n1, x2, n2) {
  p1 <- x1 / n1
  p2 <- x2 / n2
  p_pool <- (x1 + x2) / (n1 + n2)
  se <- sqrt(p_pool * (1 - p_pool) * (1/n1 + 1/n2))
  z <- (p1 - p2) / se
  p_value <- 2 * (1 - pnorm(abs(z)))  # two-tailed
  return(c(z_statistic = z, p_value = p_value))
}

#complications <- c("Gangrene", "Peripheral Neuropathy", "Osteomyelitis", "PVD")
minor_c <- c(133, 87, 139, 36)
major_c <- c(55, 23, 46, 24)

#wagner <- c("Grade1", "Grade2", "Grade3", "Grade4", "Grade5")
minor_w <- c(8, 67, 64, 161, 6)
major_w <- c(1, 10, 2, 12, 97)

# Total cases
total_c <- minor_c + major_c
total_w <- minor_w + major_w

complications <- t(mapply(
  z_test_proportion,
  x1 = minor_c,
  n1 = total_c,
  x2 = major_c,
  n2 = total_c
))

wagner <- t(mapply(
  z_test_proportion,
  x1 = minor_w,
  n1 = total_w,
  x2 = major_w,
  n2 = total_w
))

print(complications)

##      z_statistic                  p_value
## [1,]    8.045086 0.0000000000000008881784
## [2,]    8.629758 0.0000000000000000000000
## [3,]    9.669680 0.0000000000000000000000
## [4,]    2.190890 0.0284597369163106517220

print(wagner)

##      z_statistic      p_value
## [1,]    3.299832 0.0009674285
## [2,]    9.186382 0.0000000000
## [3,]   10.792815 0.0000000000
## [4,]   16.020579 0.0000000000
## [5,]  -12.680541 0.0000000000

Plots

# Create the data frame
df <- data.frame(
  Variable = c(
    rep("Age Group", 3),
    rep("Sex", 2),
    rep("Ethnicity", 4),
    rep("Smoking Status", 3),
    rep("Wagner Grade", 5)
  ),
  Level = c(
    "18–40 years", "41–65 years", ">65 years",
    "Female", "Male",
    "Chinese", "Indian", "Malay", "Other",
    "Smoker", "Non-Smoker", "Ex-Smoker",
    "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"
  ),
  Minor = c(
    5.9, 59.9, 34.2,
    36.8, 63.2,
    20.5, 16.3, 59.9, 3.3,
    29.4, 60.0, 10.6,
    2.6, 21.8, 20.8, 52.4, 2.0
  ),
  Major = c(
    2.4, 46.5, 51.2,
    37.8, 62.2,
    22.0, 11.8, 66.1, 0.0,
    19.2, 64.4, 16.3,
    0.8, 8.2, 1.6, 9.8, 79.5
  )
)

# Convert to long format
df_long <- df %>%
  pivot_longer(cols = c(Minor, Major), names_to = "AmputationType", values_to = "Percentage")

# Plot
ggplot(df_long, aes(x = Percentage, y = reorder(Level, Percentage), fill = AmputationType)) +
  geom_col(position = "dodge") +
  facet_grid(Variable ~ ., scales = "free_y", space = "free_y") +
  labs(
    title = "Percentage Distribution by Amputation Type",
    x = "Percentage",
    y = NULL
  ) +
  theme_minimal() +
  theme(strip.text.y = element_text(angle = 0))

# Convert relevant columns
dia <-dia %>%
  mutate(
    wagner = as.numeric(wagner),
    gang = ifelse(gang == "yes", 1, 0),
    pn = ifelse(pn == "yes", 1, 0),
    pvd = ifelse(pvd == "yes", 1, 0),
    osteo = ifelse(osteo == "yes", 1, 0)
  )

# Drop rows with NA in any of the key columns
df_clean <- dia %>% drop_na(wagner, gang, pn, pvd, osteo)

# Calculate percentage of "yes" per wagner score
percent_yes <- df_clean %>%
  group_by(wagner) %>%
  summarise(
    Gangrene = mean(gang) * 100,
    "Peripheral Neuropathy" = mean(pn) * 100,
    "Peripheral Vascular Disease" = mean(pvd) * 100,
    Osteomyelitis = mean(osteo) * 100
  ) %>%
  pivot_longer(cols = -wagner, names_to = "Condition", values_to = "Percentage")

# Plot horizontal grouped bar chart
ggplot(percent_yes, aes(x = Percentage, y = factor(wagner), fill = Condition)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  labs(
    title = 'Complications by Wagner Score (%)',
    x = 'Percentage',
    y = 'Wagner Score'
  ) +
  theme_minimal() +
  scale_fill_brewer(palette = "RdBu")

Binary logistic regression

model <- glm(dia$amputation ~ age + wbc + crp + esr + wagner + gang + osteo + pn, data = dia, family = binomial)

summary(model)

## 
## Call:
## glm(formula = dia$amputation ~ age + wbc + crp + esr + wagner + 
##     gang + osteo + pn, family = binomial, data = dia)
## 
## Coefficients:
##               Estimate Std. Error z value   Pr(>|z|)    
## (Intercept) -14.346284   3.279483  -4.375 0.00001217 ***
## age           0.049203   0.026520   1.855     0.0636 .  
## wbc           0.053280   0.057503   0.927     0.3541    
## crp           0.002147   0.002893   0.742     0.4581    
## esr          -0.004039   0.012473  -0.324     0.7461    
## wagner        2.298127   0.516558   4.449 0.00000863 ***
## gang         -0.447574   0.588782  -0.760     0.4472    
## osteo         0.156431   0.574457   0.272     0.7854    
## pn           -0.416545   0.677767  -0.615     0.5388    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 131.473  on 105  degrees of freedom
## Residual deviance:  84.477  on  97  degrees of freedom
##   (328 observations deleted due to missingness)
## AIC: 102.48
## 
## Number of Fisher Scoring iterations: 6

exp(cbind(OR = coef(model), confint(model)))

## Waiting for profiling to be done...

##                          OR              2.5 %        97.5 %
## (Intercept) 0.0000005881498 0.0000000004713923  0.0002056549
## age         1.0504332785908 0.9988766316505393  1.1096442099
## wbc         1.0547251567447 0.9416467650675273  1.1833735612
## crp         1.0021491809180 0.9965115889449355  1.0080154545
## esr         0.9959688933376 0.9718048924458729  1.0216973050
## wagner      9.9555193742809 3.9869306514484539 30.3602414902
## gang        0.6391769319957 0.1967082830067373  2.0398443086
## osteo       1.1693302421755 0.3794171205347890  3.7083218669
## pn          0.6593208102439 0.1640577850167579  2.4395858215