# Packages
library(tidyverse)
library(dplyr)
library(purrr)
library(readr)
library(ggplot2)
library(ggforce)
library(broom)
library(forcats)
library(corrplot)
library(lubridate)
library(reshape2)
# Load in the Data
SSI_Raw <- read_csv("SSI_Raw.csv",
col_types = cols(id = col_character(), Age = col_number(),
SmokHx = col_character(), Dx = col_character(),
Surgeon = col_character(), Location = col_character(),
Defect_Size = col_number(), Closure = col_character(),
Primary_Closure = col_number(), `Flaps/Grafts _cm2` = col_number()))
# Convert column names to lowercase
colnames(SSI_Raw) <- tolower(colnames(SSI_Raw))
# Convert string values to lowercase
SSI_Clean <- SSI_Raw %>%
mutate(across(where(is.character), tolower))
# Trim the trailing white spaces
SSI_Clean <- SSI_Clean %>%
mutate(across(where(is.character), str_trim))
#Converting the variables
SSI_Clean <- SSI_Clean %>%
mutate(
sex = dplyr::recode(sex,
`0` = "male",
`1` = "female",
.default = "NA"),
smokhx = recode(smokhx,
`0` = "never",
`1` = "current",
`2` = "former",
.default = "NA"),
dm_hx = recode(dm_hx,
`0` = "no",
`1` = "yes",
`2` = "yes", #T1DM
.default = "NA"),
immuno_hx = recode(immuno_hx,
`0` = "no",
`1` = "yes",
`2` = "yes",
.default = "NA"),
ppx_abx = recode(ppx_abx,
`0` = "no",
`1` = "yes",
`2` = "yes",
.default = "NA"),
site = recode(site,
#Oddities
"hands" = "hand",
"feet" = "foot",
"ears" = "ear",
"lips" = "lip",
"2" = "face"),
dx = recode(dx,
`1` = "SCC",
`2` = "BCC",
`3` = "Melanoma",
`4` = "Other_Malignancies",
`5` = "Lipomas",
`6` = "Cysts",
`7` = "Nevi",
`8` = "Other_Non-Malignancies",
.default = "NA"),
surgery = recode(surgery,
`0` = "Mohs",
`1` = "Excision",
.default = "NA"),
surgeon = recode(surgeon,
`1` = "Mohs_Surgeon",
`2` = "Non-Mohs_Derm",
`3` = "Plastics",
`4` = "ENT",
`5` = "Family_Med",
`6` = "Internal_Med",
`7` = "Surgical_Onc",
`8` = "OBGYN",
`9` = "Combined_Mohs +",
`10` = "Other",
.default = "NA"),
location = recode(location,
`0` = "inpatient",
`1` = "outpatient",
`2` = "mohs_out_+_OR",
.default = "NA"),
closure = recode(closure,
`1` = "Primary",
`2` = "Secondary",
`3` = "Flap",
`4` = "Flap",
`5` = "Flap",
`6` = "Flap",
`7` = "Graft",
`8` = "Other",
.default = "NA"),
ssi = recode(ssi,
`0` = "no",
`1` = "yes",
.default = "NA"))
# Removed Other closure types
SSI_Clean <- SSI_Clean |>
filter(!closure == "Other")
SSI_Clean <- SSI_Clean |>
select(-c(primary_closure, `flaps/grafts _cm2`))
SSI_Clean_Unbinned <- SSI_Clean
# Binning Continuous Data
SSI_Clean <- SSI_Clean %>%
mutate(
age = case_when(
age >= 19 & age <= 59 ~ "<60",
age >= 60 & age <= 69 ~ "60-69",
age >= 70 & age <= 79 ~ "70-79",
age >= 80 ~ "80+",
TRUE ~ NA_character_),
defect_size = case_when(
defect_size <= 1 ~ "<1.00_cm",
defect_size >= 1 & defect_size < 1.50 ~ "1.00-1.49_cm",
defect_size >= 1.50 & defect_size < 2.0 ~ "1.50-1.99_cm",
defect_size >= 2 & defect_size < 2.5 ~ "2.00-2.49_cm",
defect_size >= 2.5 ~ "≥2.50_cm",
TRUE ~ NA_character_
))
str(SSI_Clean_Unbinned)
## tibble [2,435 × 15] (S3: tbl_df/tbl/data.frame)
## $ id : num [1:2435] 2 4 5 7 8 9 10 11 12 13 ...
## $ age : num [1:2435] 81 67 67 64 71 61 60 72 69 70 ...
## $ sex : chr [1:2435] "female" "male" "male" "female" ...
## $ smokhx : chr [1:2435] "former" "former" "former" "never" ...
## $ dm_hx : chr [1:2435] "yes" "no" "no" "no" ...
## $ immuno_hx : chr [1:2435] "yes" "yes" "yes" "no" ...
## $ ppx_abx : chr [1:2435] "no" "no" "yes" "no" ...
## $ site : chr [1:2435] "face" "face" "groin" "ue" ...
## $ dx : chr [1:2435] "BCC" "SCC" "SCC" "Cysts" ...
## $ surgery : chr [1:2435] "Mohs" "Mohs" "Mohs" "Excision" ...
## $ surgeon : chr [1:2435] "Mohs_Surgeon" "Mohs_Surgeon" "Mohs_Surgeon" "Plastics" ...
## $ location : chr [1:2435] "outpatient" "outpatient" "outpatient" "inpatient" ...
## $ defect_size: num [1:2435] 1.5 2 4 1 1 2 3 1.5 2.5 0.5 ...
## $ closure : chr [1:2435] "Primary" "Primary" "Graft" "Primary" ...
## $ ssi : chr [1:2435] "no" "no" "no" "no" ...
unique(SSI_Clean_Unbinned$defect_size)
## [1] 1.5 2.0 4.0 1.0 3.0 2.5 0.5 1.1 2.7 2.3 0.8 0.4 0.7 1.2 1.8
## [16] 5.5 0.6 1.6 2.1 2.6 2.2 1.9 2.8 3.5 6.0 3.8 1.4 1.7 1.3 4.5
## [31] 0.9 2.4 3.2 8.0 0.2 3.1 4.1 3.7 5.0 3.3 2.9 5.1 3.6 4.6 6.5
## [46] 7.5 3.4 9.0 6.8 4.8 11.0 12.0 10.0 5.6 4.2 5.3 7.0 6.1 6.7 10.5
## [61] 7.2 18.0 4.7 0.0 0.3 4.3 13.7 5.2 4.9 3.9 5.4 36.0 38.0 5.7 6.2
unique(SSI_Clean_Unbinned$closure)
## [1] "Primary" "Graft" "Secondary" "Flap"
SSI_Clean_Unbinned <- SSI_Clean_Unbinned |>
filter(!defect_size >8)
SSI_Clean_Unbinned$closure <- factor(SSI_Clean_Unbinned$closure,
levels = c("Primary",
"Secondary",
"Flap",
"Graft"))
ggplot(SSI_Clean_Unbinned, aes(x = defect_size, fill = closure)) +
geom_histogram(position = "identity", alpha = 1, binwidth = .3) +
theme_minimal() +
xlim(0, 8) +
scale_fill_viridis_d(option = "plasma", direction = 1) +
labs(title = "Distribution of Defect Size by Closure Technique",
x = "Defect Size (cm)",
y = "Number of Cases",
fill = "Closure")
## Warning: Removed 8 rows containing missing values (`geom_bar()`).
Demographic Frequency
library(dplyr)
library(tidyr)
# Define the columns to be evaluated
columns_to_evaluate <- c("age", "sex", "smokhx", "dm_hx", "immuno_hx", "ppx_abx", "site", "dx", "surgeon", "location", "defect_size", "ssi")
# Reshape the data to a long format
long_df <- SSI_Clean %>%
select(all_of(columns_to_evaluate), closure) %>%
pivot_longer(cols = -closure, names_to = "variable", values_to = "level")
# Calculate frequencies for each closure level
freq_table_closure <- long_df %>%
group_by(variable, level, closure) %>%
summarise(n = n()) %>%
pivot_wider(names_from = closure, values_from = n, names_prefix = "n_", names_sep = "")
# Calculate closure totals for each level
closure_totals <- long_df %>%
group_by(variable, level, closure) %>%
summarise(Total_per_closure = n())
# Calculate percentages within each closure group
percent_table_closure <- long_df %>%
group_by(variable, closure) %>%
summarise(Total_per_closure_group = n()) %>%
left_join(closure_totals, by = c("variable", "closure")) %>%
mutate(percent = Total_per_closure / Total_per_closure_group * 100) %>%
pivot_wider(names_from = closure, values_from = percent, names_prefix = "percent_", names_sep = "") %>%
select(-Total_per_closure, -Total_per_closure_group)
# Join the count and percentage tables
closure_combined <- left_join(freq_table_closure, percent_table_closure, by = c("variable", "level"))
# Calculate total frequencies and percentages for each variable level
freq_table_total <- long_df %>%
group_by(variable, level) %>%
summarise(Total_n = n(),
Total_Percent = Total_n / sum(Total_n) * 100)
# Join the tables to form the final frequency table
final_freq_table <- left_join(freq_table_total, closure_combined, by = c("variable", "level"))
# View the final frequency table
print(final_freq_table)
## # A tibble: 186 × 12
## # Groups: variable [12]
## variable level Total_n Total_Percent n_Flap n_Graft n_Primary n_Secondary
## <chr> <chr> <int> <dbl> <int> <int> <int> <int>
## 1 age 60-69 741 100 75 31 490 145
## 2 age 60-69 741 100 75 31 490 145
## 3 age 60-69 741 100 75 31 490 145
## 4 age 60-69 741 100 75 31 490 145
## 5 age 70-79 708 100 96 28 414 170
## 6 age 70-79 708 100 96 28 414 170
## 7 age 70-79 708 100 96 28 414 170
## 8 age 70-79 708 100 96 28 414 170
## 9 age 80+ 450 100 70 24 235 121
## 10 age 80+ 450 100 70 24 235 121
## # ℹ 176 more rows
## # ℹ 4 more variables: percent_Flap <dbl>, percent_Graft <dbl>,
## # percent_Primary <dbl>, percent_Secondary <dbl>
SSI Frequency
# Define the columns to be evaluated
columns_to_evaluate <- c("age", "sex", "smokhx", "dm_hx", "immuno_hx", "ppx_abx", "site", "dx", "surgeon", "location", "surgery", "defect_size", "closure")
# Reshape the data to a long format
long_df <- SSI_Clean %>%
select(all_of(columns_to_evaluate), ssi) %>%
pivot_longer(cols = -ssi, names_to = "variable", values_to = "level")
# Calculate frequencies for each closure level
freq_table_ssi <- long_df %>%
group_by(variable, level, ssi) %>%
count(name = "n") %>%
ungroup()
# Make the new table of yes/no
freq_table_ssi_yes <- freq_table_ssi |>
filter(ssi == "yes")
freq_table_ssi_no <- freq_table_ssi |>
filter(ssi == "no")
freq_table_ssi <- left_join(freq_table_ssi_yes, freq_table_ssi_no, by = c("variable", "level")) |>
select(-c(ssi.x, ssi.y))
# Chi-Square Data
SSI_Chi_Square <- SSI_Clean
# Make all values factors
SSI_Chi_Square[] <- lapply(SSI_Chi_Square, as.factor)
# Chi-Square Analysis
# Create an empty data frame to store results
chi_square_results <- data.frame(
Variable = character(),
Chi_Square = numeric(),
Degrees_of_Freedom = numeric(),
P_Value = numeric(),
stringsAsFactors = FALSE
)
# Loop through each column in the data frame
for (col_name in colnames(SSI_Chi_Square)) {
cat("Processing:", col_name, "\n")
# Skip the 'ssi' and 'id' columns
if (col_name != 'ssi' && col_name != 'id') {
# Convert columns to factors for chi-square test
temp_col <- as.factor(SSI_Chi_Square[[col_name]])
temp_ssi <- as.factor(SSI_Chi_Square$ssi)
# Perform the chi-square test
test_result <- chisq.test(temp_col, temp_ssi)
print(test_result)
# Store the results in the data frame
chi_square_results <- rbind(
chi_square_results,
data.frame(
Variable = col_name,
Chi_Square = test_result$statistic,
Degrees_of_Freedom = test_result$parameter,
P_Value = test_result$p.value,
stringsAsFactors = FALSE
)
)
print(chi_square_results)
}
}
## Processing: id
## Processing: age
##
## Pearson's Chi-squared test
##
## data: temp_col and temp_ssi
## X-squared = 4.2815, df = 3, p-value = 0.2326
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.28154 3 0.2326239
## Processing: sex
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: temp_col and temp_ssi
## X-squared = 3.4375, df = 1, p-value = 0.06373
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540 3 0.23262390
## X-squared1 sex 3.437466 1 0.06373336
## Processing: smokhx
##
## Pearson's Chi-squared test
##
## data: temp_col and temp_ssi
## X-squared = 2.503, df = 2, p-value = 0.2861
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540 3 0.23262390
## X-squared1 sex 3.437466 1 0.06373336
## X-squared2 smokhx 2.503042 2 0.28606935
## Processing: dm_hx
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: temp_col and temp_ssi
## X-squared = 1.7904, df = 1, p-value = 0.1809
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540 3 0.23262390
## X-squared1 sex 3.437466 1 0.06373336
## X-squared2 smokhx 2.503042 2 0.28606935
## X-squared3 dm_hx 1.790356 1 0.18088280
## Processing: immuno_hx
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: temp_col and temp_ssi
## X-squared = 0.0017427, df = 1, p-value = 0.9667
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540209 3 0.23262390
## X-squared1 sex 3.437466262 1 0.06373336
## X-squared2 smokhx 2.503042038 2 0.28606935
## X-squared3 dm_hx 1.790355940 1 0.18088280
## X-squared4 immuno_hx 0.001742664 1 0.96670180
## Processing: ppx_abx
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: temp_col and temp_ssi
## X-squared = 3.6197e-28, df = 1, p-value = 1
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540e+00 3 0.23262390
## X-squared1 sex 3.437466e+00 1 0.06373336
## X-squared2 smokhx 2.503042e+00 2 0.28606935
## X-squared3 dm_hx 1.790356e+00 1 0.18088280
## X-squared4 immuno_hx 1.742664e-03 1 0.96670180
## X-squared5 ppx_abx 3.619715e-28 1 1.00000000
## Processing: site
##
## Pearson's Chi-squared test
##
## data: temp_col and temp_ssi
## X-squared = 34.699, df = 11, p-value = 0.0002778
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540e+00 3 0.232623905
## X-squared1 sex 3.437466e+00 1 0.063733363
## X-squared2 smokhx 2.503042e+00 2 0.286069349
## X-squared3 dm_hx 1.790356e+00 1 0.180882801
## X-squared4 immuno_hx 1.742664e-03 1 0.966701798
## X-squared5 ppx_abx 3.619715e-28 1 1.000000000
## X-squared6 site 3.469900e+01 11 0.000277771
## Processing: dx
##
## Pearson's Chi-squared test
##
## data: temp_col and temp_ssi
## X-squared = 15.18, df = 7, p-value = 0.03376
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540e+00 3 0.232623905
## X-squared1 sex 3.437466e+00 1 0.063733363
## X-squared2 smokhx 2.503042e+00 2 0.286069349
## X-squared3 dm_hx 1.790356e+00 1 0.180882801
## X-squared4 immuno_hx 1.742664e-03 1 0.966701798
## X-squared5 ppx_abx 3.619715e-28 1 1.000000000
## X-squared6 site 3.469900e+01 11 0.000277771
## X-squared7 dx 1.518019e+01 7 0.033757704
## Processing: surgery
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: temp_col and temp_ssi
## X-squared = 1.1048, df = 1, p-value = 0.2932
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540e+00 3 0.232623905
## X-squared1 sex 3.437466e+00 1 0.063733363
## X-squared2 smokhx 2.503042e+00 2 0.286069349
## X-squared3 dm_hx 1.790356e+00 1 0.180882801
## X-squared4 immuno_hx 1.742664e-03 1 0.966701798
## X-squared5 ppx_abx 3.619715e-28 1 1.000000000
## X-squared6 site 3.469900e+01 11 0.000277771
## X-squared7 dx 1.518019e+01 7 0.033757704
## X-squared8 surgery 1.104798e+00 1 0.293215451
## Processing: surgeon
##
## Pearson's Chi-squared test
##
## data: temp_col and temp_ssi
## X-squared = 11.093, df = 8, p-value = 0.1965
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540e+00 3 0.232623905
## X-squared1 sex 3.437466e+00 1 0.063733363
## X-squared2 smokhx 2.503042e+00 2 0.286069349
## X-squared3 dm_hx 1.790356e+00 1 0.180882801
## X-squared4 immuno_hx 1.742664e-03 1 0.966701798
## X-squared5 ppx_abx 3.619715e-28 1 1.000000000
## X-squared6 site 3.469900e+01 11 0.000277771
## X-squared7 dx 1.518019e+01 7 0.033757704
## X-squared8 surgery 1.104798e+00 1 0.293215451
## X-squared9 surgeon 1.109344e+01 8 0.196460646
## Processing: location
##
## Pearson's Chi-squared test
##
## data: temp_col and temp_ssi
## X-squared = 2.0576, df = 2, p-value = 0.3574
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540e+00 3 0.232623905
## X-squared1 sex 3.437466e+00 1 0.063733363
## X-squared2 smokhx 2.503042e+00 2 0.286069349
## X-squared3 dm_hx 1.790356e+00 1 0.180882801
## X-squared4 immuno_hx 1.742664e-03 1 0.966701798
## X-squared5 ppx_abx 3.619715e-28 1 1.000000000
## X-squared6 site 3.469900e+01 11 0.000277771
## X-squared7 dx 1.518019e+01 7 0.033757704
## X-squared8 surgery 1.104798e+00 1 0.293215451
## X-squared9 surgeon 1.109344e+01 8 0.196460646
## X-squared10 location 2.057629e+00 2 0.357430496
## Processing: defect_size
##
## Pearson's Chi-squared test
##
## data: temp_col and temp_ssi
## X-squared = 14.799, df = 4, p-value = 0.005137
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540e+00 3 0.232623905
## X-squared1 sex 3.437466e+00 1 0.063733363
## X-squared2 smokhx 2.503042e+00 2 0.286069349
## X-squared3 dm_hx 1.790356e+00 1 0.180882801
## X-squared4 immuno_hx 1.742664e-03 1 0.966701798
## X-squared5 ppx_abx 3.619715e-28 1 1.000000000
## X-squared6 site 3.469900e+01 11 0.000277771
## X-squared7 dx 1.518019e+01 7 0.033757704
## X-squared8 surgery 1.104798e+00 1 0.293215451
## X-squared9 surgeon 1.109344e+01 8 0.196460646
## X-squared10 location 2.057629e+00 2 0.357430496
## X-squared11 defect_size 1.479889e+01 4 0.005137038
## Processing: closure
##
## Pearson's Chi-squared test
##
## data: temp_col and temp_ssi
## X-squared = 11.468, df = 3, p-value = 0.009447
##
## Variable Chi_Square Degrees_of_Freedom P_Value
## X-squared age 4.281540e+00 3 0.232623905
## X-squared1 sex 3.437466e+00 1 0.063733363
## X-squared2 smokhx 2.503042e+00 2 0.286069349
## X-squared3 dm_hx 1.790356e+00 1 0.180882801
## X-squared4 immuno_hx 1.742664e-03 1 0.966701798
## X-squared5 ppx_abx 3.619715e-28 1 1.000000000
## X-squared6 site 3.469900e+01 11 0.000277771
## X-squared7 dx 1.518019e+01 7 0.033757704
## X-squared8 surgery 1.104798e+00 1 0.293215451
## X-squared9 surgeon 1.109344e+01 8 0.196460646
## X-squared10 location 2.057629e+00 2 0.357430496
## X-squared11 defect_size 1.479889e+01 4 0.005137038
## X-squared12 closure 1.146783e+01 3 0.009447324
## Processing: ssi
multicol <- model.matrix(~ site + dx + ppx_abx + dm_hx + smokhx + immuno_hx + defect_size + closure - 1, data = SSI_Clean)
# Calculate the correlation matrix
cor_matrix <- cor(multicol)
# Convert the correlation matrix into a long format
long_cor_matrix <- melt(cor_matrix)
corrplot(cor_matrix, method = "circle", tl.cex = 0.6)
SSI_Clean$defect_size <- factor(SSI_Clean$defect_size,
levels = c("<1.00_cm", "1.00-1.49_cm", "1.50-1.99_cm", "2.00-2.49_cm", "≥2.50_cm"))
# Reorder closure
SSI_Clean$closure <- factor(SSI_Clean$closure,
levels = c("Primary", "Secondary", "Flap", "Graft"))
# Setting the reference for dx
SSI_Clean$dx <- factor(SSI_Clean$dx,
levels = c("BCC", "SCC", "Cysts", "Lipomas", "Nevi", "Other_Non-Malignancies", "Melanoma", "Other_Malignancies"))
# Setting the reference for site
SSI_Clean$site <- factor(SSI_Clean$site,
levels = c("face", "groin", "ue", "ear", "trunk", "hand", "nose", "scalp", "neck", "lip", "le", "foot"))
SSI_Clean$smokhx <- factor(SSI_Clean$smokhx,
levels = c("never", "former", "current"))
SSI_Clean$age <- factor(SSI_Clean$age,
levels = c("<60", "60-69", "70-79", "80+"))
SSI_Clean$immuno_hx <- factor(SSI_Clean$immuno_hx,
levels = c("no", "yes"))
SSI_Clean$dm_hx <- factor(SSI_Clean$dm_hx,
levels = c("no", "yes"))
SSI_Clean$ppx_abx <- factor(SSI_Clean$ppx_abx,
levels = c("no", "yes"))
SSI_Clean$surgery <- factor(SSI_Clean$surgery,
levels = c("Mohs", "Excision"))
#Create a new data set for the analysis where SSI is numerical
SSI_Regression <- SSI_Clean
SSI_Regression$ssi <- ifelse(SSI_Regression$ssi == "yes", 1, 0)
# Perform Multivariate Logistic Regression
glm_model <- glm(ssi ~
site +
age +
immuno_hx +
smokhx +
dm_hx +
ppx_abx +
dx +
defect_size +
closure,
data = SSI_Regression,
family = binomial)
summary(glm_model)
##
## Call:
## glm(formula = ssi ~ site + age + immuno_hx + smokhx + dm_hx +
## ppx_abx + dx + defect_size + closure, family = binomial,
## data = SSI_Regression)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.11474 0.35102 -8.873 < 2e-16 ***
## sitegroin -0.10920 1.07647 -0.101 0.91920
## siteue 0.22508 0.27424 0.821 0.41180
## siteear 0.37894 0.30487 1.243 0.21389
## sitetrunk 0.08526 0.27953 0.305 0.76035
## sitehand -0.19919 0.58092 -0.343 0.73168
## sitenose 0.71477 0.32401 2.206 0.02739 *
## sitescalp -1.12152 0.49885 -2.248 0.02456 *
## siteneck -1.48692 0.73488 -2.023 0.04304 *
## sitelip 0.86789 0.52022 1.668 0.09526 .
## sitele 0.72942 0.31823 2.292 0.02190 *
## sitefoot 1.09162 0.70681 1.544 0.12248
## age60-69 -0.39404 0.22869 -1.723 0.08488 .
## age70-79 -0.35654 0.23653 -1.507 0.13171
## age80+ -0.14404 0.25352 -0.568 0.56993
## immuno_hxyes -0.09313 0.16289 -0.572 0.56749
## smokhxformer 0.18578 0.16250 1.143 0.25292
## smokhxcurrent -0.27345 0.38793 -0.705 0.48088
## dm_hxyes 0.19223 0.18327 1.049 0.29422
## ppx_abxyes -0.20220 0.27005 -0.749 0.45401
## dxSCC 0.14147 0.20253 0.699 0.48486
## dxCysts -0.38126 0.34437 -1.107 0.26823
## dxLipomas -0.20691 0.64339 -0.322 0.74776
## dxNevi -0.68041 0.55213 -1.232 0.21782
## dxOther_Non-Malignancies -0.27729 0.46257 -0.599 0.54887
## dxMelanoma 0.32553 0.29527 1.102 0.27025
## dxOther_Malignancies 0.01011 0.77699 0.013 0.98962
## defect_size1.00-1.49_cm 0.23218 0.33912 0.685 0.49357
## defect_size1.50-1.99_cm 0.80865 0.30477 2.653 0.00797 **
## defect_size2.00-2.49_cm 0.91721 0.32477 2.824 0.00474 **
## defect_size≥2.50_cm 0.90829 0.31332 2.899 0.00374 **
## closureSecondary 0.14348 0.22535 0.637 0.52430
## closureFlap -0.23222 0.29496 -0.787 0.43111
## closureGraft 0.28355 0.39512 0.718 0.47299
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1334 on 2434 degrees of freedom
## Residual deviance: 1260 on 2401 degrees of freedom
## AIC: 1328
##
## Number of Fisher Scoring iterations: 6
# Calculating Odds Ratios and Confidence Intervals
exp_coef <- exp(coef(glm_model))
conf_int <- exp(confint(glm_model))
result <- data.frame(OR = exp_coef, '2.5 %' = conf_int[, 1], '97.5 %' = conf_int[, 2])
#Create the table
library(broom)
glm_summary <- tidy(glm_model, conf.int = TRUE, conf.level = 0.95, exponentiate = TRUE)
colnames(glm_summary) <- c("Variable", "OR", "Std. Error", "Statistic", "p-value", "CI lower", "CI upper")
library(ggplot2)
p_values <- summary(glm_model)$coefficients[, 4]
# Modify the significant_mask to include "Size: 1.00 - 1.49 cm"
significant_mask <- p_values < 0.05 | names(p_values) == "defect_size1.00-1.49_cm"
significant_exp_coef <- exp_coef[significant_mask]
significant_conf_int <- conf_int[significant_mask, ]
# Create a data frame containing variable names, odds ratios, and confidence intervals
forest_data_significant <- data.frame(
Term = names(significant_exp_coef),
OR = as.numeric(significant_exp_coef),
Lower = as.numeric(significant_conf_int[, 1]),
Upper = as.numeric(significant_conf_int[, 2])
)
closure_terms <- c("closureSecondary", "closureFlap", "closureGraft")
closure_exp_coef <- exp_coef[closure_terms]
closure_conf_int <- conf_int[closure_terms, ]
# Create a data frame for the closure terms
closure_data <- data.frame(
Term = closure_terms,
OR = as.numeric(closure_exp_coef),
Lower = as.numeric(closure_conf_int[, 1]),
Upper = as.numeric(closure_conf_int[, 2])
)
# Bind the closure data to the original dataframe
forest_data_significant <- rbind(forest_data_significant, closure_data)
# Rename the variables
forest_data_significant <- forest_data_significant %>%
mutate(
Term = dplyr::recode(Term,
"sitescalp" = "Site: Scalp",
"siteneck" = "Site: Neck",
"sitenose" = "Site: Nose",
"sitele" = "Site: Lower Extremity",
"defect_size1.00-1.49_cm" = "Size: 1.00 - 1.49 cm",
"defect_size2.00-2.49_cm" = "Size: 2.00 - 2.49 cm",
"defect_size1.50-1.99_cm" = "Size: 1.50 - 1.99 cm",
"defect_size≥2.50_cm" = "Size: ≥2.50 cm",
"closureSecondary" = "Closure: Secondary",
"closureFlap" = "Closure: Flap",
"closureGraft" = "Closure: Graft")) %>%
filter(!Term == "(Intercept)")
# Add in the other variables for comparison
new_terms <- data.frame(
Term = c("Site: Face", "Size: < 1.00 cm", "Closure: Primary"),
OR = NA,
Lower = NA,
Upper = NA,
stringsAsFactors = FALSE)
forest_data_significant <- rbind(forest_data_significant, new_terms)
# Order the variables
term_order <- rev(c(
"Size: < 1.00 cm",
"Size: 1.00 - 1.49 cm",
"Size: 1.50 - 1.99 cm",
"Size: 2.00 - 2.49 cm",
"Size: ≥2.50 cm",
"Site: Face",
"Site: Scalp",
"Site: Neck",
"Site: Nose",
"Site: Lower Extremity",
"Closure: Primary",
"Closure: Secondary",
"Closure: Flap",
"Closure: Graft"))
# Convert Term to a factor with the specified order
forest_data_significant$Term <- factor(forest_data_significant$Term, levels = term_order)
# Arrange the rows based on the factor levels
forest_data_significant <- forest_data_significant %>%
arrange(Term)
# Create a new column for grouping
forest_data_significant$Group <- ifelse(grepl("^Site:", forest_data_significant$Term), "Site",
ifelse(grepl("^Size:", forest_data_significant$Term), "Defect Size", "Closure"))
# Recode the variables for the graph
forest_data_significant <- forest_data_significant %>%
mutate(
Term = dplyr::recode(Term,
"Closure: Primary" = "Primary",
"Closure: Secondary" = "Secondary",
"Closure: Flap" = "Flap",
"Closure: Graft" = "Graft",
"Site: Face" = "Face",
"Site: Scalp" = "Scalp",
"Site: Nose" = "Nose",
"Site: Neck" = "Neck",
"Site: Lower Extremity" = "Lower Extemity",
"Size: < 1.00 cm" = "< 1.00 cm",
"Size: 1.00 - 1.49 cm" = "1.00 - 1.49 cm*",
"Size: 1.50 - 1.99 cm" = "1.50 - 1.99 cm",
"Size: 2.00 - 2.49 cm" = "2.00 - 2.50 cm",
"Size: ≥2.50 cm" = "≥ 2.50 cm"))
# Forest Plot with Facet
ggplot(forest_data_significant, aes(x = Term, y = OR, ymin = Lower, ymax = Upper)) +
geom_pointrange(aes(color = ifelse(Lower <= 1 & Upper >= 1, "grey",
ifelse(OR > 1, "red", "blue")),
fill = ifelse(Lower <= 1 & Upper >= 1, "grey",
ifelse(OR > 1, "red", "blue")))) +
geom_hline(yintercept = 1, linetype = "dashed") +
coord_flip() +
xlab("") +
ylab("Odds Ratio (95% CI)") +
scale_fill_manual(values = c("red" = "red", "blue" = "blue", "grey" = "grey"), guide = "none") +
scale_color_manual(values = c("red" = "red", "blue" = "blue", "grey" = "grey"), guide = "none") +
geom_text(data = subset(forest_data_significant, Term %in% c("Face", "< 1.00 cm", "Primary")),
aes(label = "[Reference]", y = 0.4, x = Term), size = 3, color = "grey40") +
theme_minimal() +
facet_grid(Group ~ ., scales = "free_y", space = "free_y", switch = "y") +
theme(
strip.background = element_rect(fill = "grey90", colour = "grey90", size = 1),
strip.text = element_text(size = 14, colour = "black"),
strip.placement = "outside",
axis.title.y = element_text(size = 12),
axis.text.y = element_text(size = 10),
axis.title.x = element_text(size = 12, vjust = -1),
axis.text.x = element_text(size = 10),
axis.ticks = element_line(colour = "gray10"),
axis.text = element_text(size = 12, colour = "gray10")
) +
labs(x = NULL)
# ANOVA
anova(glm_model, test = "Chisq")
# Model Fit
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(glm_model)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -629.99411375 -667.01538638 74.04254526 0.05550288 0.02994996
## r2CU
## 0.07100305
# Multicollinearity Assessment
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
vif_result <- vif(glm_model)
print(vif_result)
## GVIF Df GVIF^(1/(2*Df))
## site 4.534579 11 1.071131
## age 1.337120 3 1.049611
## immuno_hx 1.124382 1 1.060369
## smokhx 1.100644 2 1.024264
## dm_hx 1.063055 1 1.031045
## ppx_abx 1.058033 1 1.028607
## dx 2.278550 7 1.060589
## defect_size 1.445961 4 1.047176
## closure 2.649060 3 1.176292
detach(package:car, unload = TRUE)
library(ggplot2)
library(ggforce)
# Calculate predicted probabilities and confidence intervals for each defect_size and set reference values
new_data <- expand.grid(
age = levels(SSI_Regression$age)[1],
immuno_hx = levels(SSI_Regression$immuno_hx)[1],
dm_hx = levels(SSI_Regression$dm_hx)[1],
smokhx = levels(SSI_Regression$smokhx)[1],
ppx_abx = levels(SSI_Regression$ppx_abx)[1],
site = levels(SSI_Regression$site)[1],
dx = levels(SSI_Regression$dx)[1],
surgery = levels(SSI_Regression$surgery)[1],
defect_size = levels(SSI_Regression$defect_size),
closure = levels(SSI_Regression$closure)[1])
predicted_values <- predict(glm_model, newdata = new_data, type = "response", se.fit = TRUE)
# Compute Wald 95% CIs
z_value <- qnorm(0.975)
new_data$prob <- predicted_values$fit
new_data$CI_low <- predicted_values$fit - z_value * predicted_values$se.fit
new_data$CI_high <- predicted_values$fit + z_value * predicted_values$se.fit
new_data$defect_size <- factor(new_data$defect_size,
levels = c("<1.00_cm", "1.00-1.49_cm", "1.50-1.99_cm", "2.00-2.49_cm", "≥2.50_cm"))
new_data <- new_data %>%
mutate(
prob = prob * 100,
CI_low = CI_low * 100,
CI_high = CI_high * 100,
defect_size = fct_recode(defect_size,
"<1.00_cm" = "< 1.00",
"1.00-1.49_cm" = "1.00 - 1.49",
"1.50-1.99_cm" = "1.50 - 1.99",
"2.00-2.49_cm" = "2.00 - 2.49",
"≥2.50_cm" = "≥ 2.50"))
plot_data <- new_data %>%
filter(site == "face", dx == "BCC", closure == "Primary")
# Creat infection rates to compare aganist predicted
infection_rate <- SSI_Clean |>
group_by(defect_size) |>
dplyr::summarise(
total = n(),
yes_count = sum(ssi == "yes", na.rm = TRUE),
infection_percentage = (yes_count / total) * 100)
plot_data$infection_rate <- infection_rate$infection_percentage
# Plot
library(ggplot2)
p <- ggplot(plot_data, aes(x = defect_size, y = prob)) +
geom_line(group = 1, color = "#2C3E50", size = 1) +
geom_pointrange(aes(ymin = CI_low, ymax = CI_high), color = "#3498DB", size = 0.5, fatten = 2) +
xlab("Defect Size") +
ylab("Predicted Probability of Surgical Site Infection") +
ggtitle("Predicted Probability of Infection by Defect Size") +
theme_light(base_size = 14, base_family = "Helvetica") +
theme(plot.title = element_text(face = "bold", size = 20),
axis.title = element_text(size = 14))
print(p)
packages <- c("dplyr", "ggplot2", "tidyr", "tidyverse", "purrr", "broom", "lubridate", "forcats", "stringr", "tibble", "readr", "pscl", "car", "corrplot", "reshape2")
package_versions <- sapply(packages, packageVersion)
package_versions
## $dplyr
## [1] 1 1 3
##
## $ggplot2
## [1] 3 4 4
##
## $tidyr
## [1] 1 3 0
##
## $tidyverse
## [1] 2 0 0
##
## $purrr
## [1] 1 0 2
##
## $broom
## [1] 1 0 5
##
## $lubridate
## [1] 1 9 3
##
## $forcats
## [1] 1 0 0
##
## $stringr
## [1] 1 5 0
##
## $tibble
## [1] 3 2 1
##
## $readr
## [1] 2 1 4
##
## $pscl
## [1] 1 5 5 1
##
## $car
## [1] 3 1 2
##
## $corrplot
## [1] 0 92
##
## $reshape2
## [1] 1 4 4