setwd("D:/ACADEMIA/Research/burns ortho")
rm(list = ls())
library(readr)
library(tidyverse)
data <- read_csv("data.csv")
# Assuming your data is loaded into a dataframe called df
df <- read.csv("data.csv")
# Filter for "Yes" responses and sum counts for each anatomical location
anatomic_locations <- c("AnatomiclocationaffectedHead",
"AnatomiclocationaffectedPerin",
"AnatomiclocationaffectedUpper",
"AnatomiclocationaffectedLower",
"AnatomiclocationaffectedAnter",
"AnatomiclocationaffectedPoste")
# Convert data to numeric and replace NA with 0s
df[anatomic_locations] <- sapply(df[anatomic_locations], as.numeric)
df[is.na(df)] <- 0
# Summing the counts of "Yes" responses
sums <- colSums(df[anatomic_locations] == 1)
# Calculating proportions
total_responses <- 332
proportions <- (sums / total_responses) * 100
rounded_proportions <- round(proportions, 2)
# Rename anatomical location names for better readability
names(rounded_proportions) <- c("Head", "Perinium", "Upper limb", "Lower limb", "Anterior trunk", "Posterior trunk")
# Creating a data frame for plotting
plot_data <- data.frame(Location = names(rounded_proportions), Percentage = rounded_proportions)
# Generating the bar plot
ggplot(plot_data, aes(x=Location, y=Percentage, fill=Location)) +
geom_bar(stat="identity", color="black") +
geom_text(aes(label=paste0(Percentage, "%")), vjust=-0.3, size=3.5) + ylim(0, 100) +
scale_fill_manual(values=c("red", "green", "blue", "orange", "purple", "brown")) +
theme_minimal() +
labs(title="Proportion of burnt area Anatomic Location",
x="",
y="Percentage (%)") +
theme(axis.text.x = element_text(angle = 45, hjust = 0.7))

# Assuming your data is loaded into a dataframe called df
df <- read.csv("data.csv")
# Filter for "Yes" responses and sum counts for each pre-existing medical condition
pre_existing_conditions <- c("PreexistingmedicalconditionM", # Malnutrition
"PreexistingmedicalconditionE", # Epilepsy
"PreexistingmedicalconditionO") # Other
# Convert data to numeric and replace NA with 0s
df[pre_existing_conditions] <- sapply(df[pre_existing_conditions], as.numeric)
df[is.na(df)] <- 0
# Summing the counts of "Yes" responses
sums <- colSums(df[pre_existing_conditions] == 1)
# Calculating proportions
total_responses <- 332
proportions <- (sums / total_responses) * 100
rounded_proportions <- round(proportions, 2)
# Rename pre-existing medical condition names for better readability
names(rounded_proportions) <- c("Malnutrition", "Epilepsy", "Other")
# Creating a data frame for plotting
plot_data <- data.frame(Condition = names(rounded_proportions), Percentage = rounded_proportions)
# Generating the bar plot
ggplot(plot_data, aes(x=Condition, y=Percentage, fill=Condition)) +
geom_bar(stat="identity", color="black") +
geom_text(aes(label=paste0(Percentage, "%")), vjust=-0.3, size=3.5) + ylim(0, 10) +
scale_fill_manual(values=c("blue", "green", "red")) +
theme_minimal() +
labs(title="Proportion of participants with Pre-existing Medical Condition ",
x="",
y="Percentage (%)") +
theme(axis.text.x = element_text(angle = 45, hjust = 0.7))

# Assuming your data is loaded into a dataframe called df
df <- read.csv("data.csv")
# Create new variables based on presence of keywords in the 'Otherhospitalintervention' column
keywords <- c("Oxygen therapy", "Nutritional rehabilitation", "IV dexamethasone",
"Blood transfusion", "Antacids", "Multivitamins")
# Use grepl to detect the presence of each keyword and create a new binary variable for each
for (keyword in keywords) {
df[[keyword]] <- as.numeric(grepl(keyword, df$Otherhospitalintervention, ignore.case = TRUE))
}
# Summing the counts of "Yes" responses for each new keyword variable
sums <- colSums(df[, keywords])
# Calculating proportions
total_responses <- nrow(df) # Using the total number of rows in the dataset as the denominator
proportions <- (sums / total_responses) * 100
rounded_proportions <- round(proportions, 2)
# Creating a data frame for plotting
plot_data <- data.frame(Intervention = names(rounded_proportions), Percentage = rounded_proportions)
# Generating the bar plot
ggplot(plot_data, aes(x=Intervention, y=Percentage, fill=Intervention)) +
geom_bar(stat="identity", color="black") +
geom_text(aes(label=paste0(Percentage, "%")), vjust=-0.3, size=3.5) +
scale_fill_brewer(palette="Set2") + # Using a color palette that provides good visual distinction
theme_minimal() +
labs(title="Proportion of Interventions Recorded in 'Other Hospital Intervention' (%)",
x="Interventions",
y="Percentage (%)") +
theme(axis.text.x = element_text(angle=45, hjust=1))

# Assuming your data is loaded into a dataframe called df
df <- read.csv("data.csv")
# Create new variables based on presence of keywords in the 'Otherhospitalintervention' column
keywords <- c("Oxygen therapy", "Nutritional rehabilitation", "IV dexamethasone",
"Blood transfusion", "Antacids", "Multivitamins")
# Additional intervention columns already in the dataset
additional_interventions <- c("HospitalinterventionSurgery",
"HospitalinterventionTetanust",
"HospitalinterventionAntibioti")
# Use grepl to detect the presence of each keyword and create a new binary variable for each
for (keyword in keywords) {
df[[keyword]] <- as.numeric(grepl(keyword, df$Otherhospitalintervention, ignore.case = TRUE))
}
# Summing the counts of "Yes" responses for each new keyword variable and additional interventions
sums <- colSums(df[, c(keywords, additional_interventions), drop = FALSE])
# Calculating proportions
total_responses <- nrow(df) # Using the total number of rows in the dataset as the denominator
proportions <- (sums / total_responses) * 100
rounded_proportions <- round(proportions, 2)
# Renaming indices in the summed results for better readability
names(rounded_proportions)[names(rounded_proportions) %in% additional_interventions] <- c("Surgery", "Tetanus toxoid", "Antibiotics")
# Creating a data frame for plotting
plot_data <- data.frame(Intervention = names(rounded_proportions), Percentage = rounded_proportions)
# Generating the bar plot
ggplot(plot_data, aes(x=Intervention, y=Percentage, fill=Intervention)) +
geom_bar(stat="identity", color="black") +
geom_text(aes(label=paste0(Percentage, "%")), vjust=-0.3, size=3.5) + ylim(0, 100) +
scale_fill_brewer(palette="Set3") + # Using a color palette that provides good visual distinction
theme_minimal() +
labs(title="Proportion of participants and Hospital Interventions received",
x="",
y="Percentage (%)") +
theme(axis.text.x = element_text(angle=45, hjust=0.9))

# Assuming your data is loaded into a dataframe called df
df <- read.csv("data.csv")
# Define the outcome variables and their new names
outcome_variables <- c("OutcomeofchildContractures", "OutcomeofchildDisfigurement",
"OutcomeofchildScarsofgraft", "OutcomeofchildAmputation",
"OutcomeofchildDeath", "OutcomeofchildRecoverywithou",
"OutcomeofchildOther")
new_names <- c("Contractures", "Disfigurement", "Scar of graft", "Amputation",
"Died", "No complication-Recovered", "Other")
# Create new variables by renaming the columns
df[new_names] <- df[, outcome_variables]
# Summing the counts of "Yes" responses for each outcome
sums <- colSums(df[new_names] == 1, na.rm = TRUE) # Using na.rm = TRUE to handle missing values
# Calculating proportions
total_responses <- nrow(df) # Using the total number of rows in the dataset as the denominator
proportions <- (sums / total_responses) * 100
rounded_proportions <- round(proportions, 2)
# Creating a data frame for plotting
plot_data <- data.frame(Outcome = names(rounded_proportions), Percentage = rounded_proportions)
# Generating the bar plot
ggplot(plot_data, aes(x=Outcome, y=Percentage, fill=Outcome)) +
geom_bar(stat="identity", color="black") +
geom_text(aes(label=paste0(Percentage, "%")), vjust=-0.3, size=3.5) + ylim(0, 60) +
scale_fill_brewer(palette="Set2") + # Using a color palette that provides good visual distinction
theme_minimal() +
labs(title="Proportion of Outcomes for Children with Burns",
x="",
y="Percentage (%)") +
theme(axis.text.x = element_text(angle=45, hjust=0.9))

# Load necessary libraries
library(survival)
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
# Read the dataset
df <- read.csv("data.csv")
# 'Death' is your event indicator and 'Time' is the time to event
# Ensure 'Death' is coded as 0 = censored, 1 = event occurred
surv_obj <- Surv(time = df$Time, event = df$Death)
# Fit the survival curve using Kaplan-Meier estimate
fit <- survfit(surv_obj ~ 1)
# Plotting the Kaplan-Meier survival curve
ggsurvplot(fit,
data = df,
risk.table = F, # Add a risk table
pval = TRUE, # Show p-value of the log-rank test
conf.int = F, # Show the confidence interval
xlim = c(0, max(df$Time)), # Set limits for x-axis
ylim = c(0, 1), # Set limits for y-axis
xlab = "Time to Death", # Label for x-axis
ylab = "Survival Probability",# Label for y-axis
title = "Kaplan-Meier Survival Curve") # Title of the plot
## Warning in .pvalue(fit, data = data, method = method, pval = pval, pval.coord = pval.coord, : There are no survival curves to be compared.
## This is a null model.

# Save the plot to a PDF file
#pdf("Kaplan_Meier_Plot.pdf")
#ggsurvplot(fit,
#data = df,
#risk.table = TRUE,
#pval = TRUE,
#conf.int = TRUE,
#xlim = c(0, max(df$Time)),
#xlab = "Time to Death",
#ylab = "Survival Probability",
#title = "Kaplan-Meier Survival Curve")
#dev.off()
# Read the dataset
df <- read.csv("data.csv")
#make Gender a factor
df$Gender <- as.factor(df$Gender)
# Create a survival object
surv_obj <- Surv(time = df$Time, event = df$Death)
# Fit the survival curve using Kaplan-Meier estimate stratified by Gender
fit <- survfit(surv_obj ~ Gender, data = df)
# Plotting the Kaplan-Meier survival curve stratified by Gender
# and include the log-rank test for comparison
ggsurvplot(fit,
data = df,
pval = TRUE, # Include the log-rank test p-value
#p-value position upper right
pval.coord = c(0, 0.4),
pval.size = 5,
conf.int = F, # Show confidence intervals
palette = c("#FF0000", "#0000FF"), # Colors for Female and Male
risk.table = F, # Add risk table below the plot
xlim = c(0, max(df$Time)), # Set x-axis limits
ylim = c(0.25, 1), # Set y-axis limits
xlab = "Time to Event", # Label for x-axis
ylab = "Survival Probability",# Label for y-axis
legend.title = "Gender", # Legend title
title = "Kaplan-Meier Survival Curve by Gender") # Title of the plot

library(ggrepel)
# Read the dataset
df <- read.csv("data.csv")
# Calculate the proportion of each category
circumstances_proportions <- table(df$Circumstancesofthebur) / nrow(df) * 100
circumstances_proportions <- round(circumstances_proportions, 2) # Round to two decimal places
# Create a data frame from the table for plotting
circumstances_data <- data.frame(Category = names(circumstances_proportions),
Proportion = as.numeric(circumstances_proportions),
Label = paste0(names(circumstances_proportions), " (", as.numeric(circumstances_proportions), "%)"))
# Convert the data to a factor to maintain category order in the plot
circumstances_data$Category <- factor(circumstances_data$Category, levels = circumstances_data$Category)
# Create the pie chart
pie_chart <- ggplot(circumstances_data, aes(x = "", y = Proportion, fill = Category)) +
geom_bar(width = 1, stat = "identity") +
coord_polar(theta = "y") +
theme_void() +
theme(legend.title = element_blank()) +
geom_label_repel(aes(label = Label),
position = position_stack(vjust = 0.5),
box.padding = 1,
point.padding = 3,
segment.color = 'grey50',
size = 3)
# Display the pie chart
print(pie_chart)

# Save the pie chart as a PNG file
ggsave("circumstances_pie_chart_with_arrows.png", plot = pie_chart, width = 10, height = 8, units = "in")
library(ggrepel)
# Read the dataset
df <- read.csv("data.csv")
# Assuming 'depthofburn' is the column of interest
# Calculate the proportion of each category
circumstances_proportions <- table(df$depthofburn) / nrow(df) * 100
circumstances_proportions <- round(circumstances_proportions, 2) # Round to two decimal places
# Create a data frame from the table for plotting
circumstances_data <- data.frame(Category = names(circumstances_proportions),
Proportion = as.numeric(circumstances_proportions),
Label = paste0(names(circumstances_proportions), " (", as.numeric(circumstances_proportions), "%)"))
# Convert the data to a factor to maintain category order in the plot
circumstances_data$Category <- factor(circumstances_data$Category, levels = circumstances_data$Category)
# Create the pie chart
pie_chart <- ggplot(circumstances_data, aes(x = "", y = Proportion, fill = Category)) +
geom_bar(width = 1, stat = "identity") +
coord_polar(theta = "y") +
theme_void() +
theme(legend.title = element_blank()) +
geom_label_repel(aes(label = Label),
position = position_stack(vjust = 0.5),
box.padding = 1,
point.padding = 3,
segment.color = 'grey50',
size = 3)
# Display the pie chart
print(pie_chart)

# Save the pie chart as a PNG file
ggsave("circumstances_pie_chart_with_arrows.png", plot = pie_chart, width = 10, height = 8, units = "in")
# Read the dataset
df <- read.csv("data.csv")
#make prehospitalintervention a factor
df$prehospitalintervention <- as.factor(df$prehospitalintervention)
# Create a survival object
surv_obj <- Surv(time = df$Time, event = df$Death)
# Fit the survival curve using Kaplan-Meier estimate stratified by prehospitalintervention
fit <- survfit(surv_obj ~ prehospitalintervention, data = df)
# Plotting the Kaplan-Meier survival curve stratified by prehospitalintervention
# and include the log-rank test for comparison
ggsurvplot(fit,
data = df,
pval = TRUE, # Include the log-rank test p-value
#p-value position upper right
pval.coord = c(0, 0.4),
pval.size = 5,
conf.int = F, # Show confidence intervals
palette = c("#FF0000", "#0000FF"), # Colors for Female and Male
risk.table = F, # Add risk table below the plot
xlim = c(0, max(df$Time)), # Set x-axis limits
ylim = c(0.25, 1), # Set y-axis limits
xlab = "Time to Event", # Label for x-axis
ylab = "Survival Probability",# Label for y-axis
legend.title = "Prehospital intervention", # Legend title
title = "Kaplan-Meier Survival Curve by prehospital intervention") # Title of the plot

# Read the dataset
df <- read.csv("data.csv")
#make Residenc a factor
df$Residenc <- as.factor(df$Residenc)
# Create a survival object
surv_obj <- Surv(time = df$Time, event = df$Death)
# Fit the survival curve using Kaplan-Meier estimate stratified by Residenc
fit <- survfit(surv_obj ~ Residenc, data = df)
# Plotting the Kaplan-Meier survival curve stratified by Residenc
# and include the log-rank test for comparison
ggsurvplot(fit,
data = df,
pval = TRUE, # Include the log-rank test p-value
#p-value position upper right
pval.coord = c(0, 0.4),
pval.size = 5,
conf.int = F, # Show confidence intervals
palette = c("#FF0000", "#0000FF"), # Colors for Female and Male
risk.table = F, # Add risk table below the plot
xlim = c(0, max(df$Time)), # Set x-axis limits
ylim = c(0.25, 1), # Set y-axis limits
xlab = "Time to Event", # Label for x-axis
ylab = "Survival Probability",# Label for y-axis
legend.title = "Residence", # Legend title
title = "Kaplan-Meier Survival Curve by Residence") # Title of the plot

#scatter plot of age and length of hospital stay by death
# Read the dataset
df <- read.csv("data.csv")
#name Death values (1=Died, 0=Alive)
df$Death <- ifelse(df$Death == 1, "Died", "Alive")
#make Death a factor
df$Death <- as.factor(df$Death)
#make depthofburn a factor
df$depthofburn <- as.factor(df$depthofburn)
ggplot(df, aes(y = Age, x = Lengthofhospitalstaydays, color = Death)) +
geom_point( aes(shape = depthofburn)) +
labs(title = "Scatter Plot of Age and Length of Hospital Stay",
y = "Age",
x = "Length of Hospital Stay",
color = "Vital status",
shape = "Depth of burns") +
theme_minimal()

#scatter plot of age and TBSA stay by death
# Read the dataset
df <- read.csv("data.csv")
#name Death values (1=Died, 0=Alive)
df$Death <- ifelse(df$Death == 1, "Died", "Alive")
#make Death a factor
df$Death <- as.factor(df$Death)
#make depthofburn a factor
df$depthofburn <- as.factor(df$depthofburn)
ggplot(df, aes(y = Age, x = TBSA, color = Death)) +
geom_point( aes(shape = depthofburn)) +
labs(title = "Scatter Plot of Age and TBSA ",
y = "Age",
x = "TBSA (%)",
color = "Vital status",
shape = "Depth of burns") +
theme_minimal()
