setwd("D:/ACADEMIA/Research/burns ortho")
rm(list = ls())
library(readr)
library(tidyverse)
data <- read_csv("data.csv")
# Assuming your data is loaded into a dataframe called df
df <- read.csv("data.csv")

# Filter for "Yes" responses and sum counts for each anatomical location
anatomic_locations <- c("AnatomiclocationaffectedHead",
                        "AnatomiclocationaffectedPerin",
                        "AnatomiclocationaffectedUpper",
                        "AnatomiclocationaffectedLower",
                        "AnatomiclocationaffectedAnter",
                        "AnatomiclocationaffectedPoste")

# Convert data to numeric and replace NA with 0s
df[anatomic_locations] <- sapply(df[anatomic_locations], as.numeric)
df[is.na(df)] <- 0

# Summing the counts of "Yes" responses
sums <- colSums(df[anatomic_locations] == 1)

# Calculating proportions
total_responses <- 332
proportions <- (sums / total_responses) * 100
rounded_proportions <- round(proportions, 2)

# Rename anatomical location names for better readability
names(rounded_proportions) <- c("Head", "Perinium", "Upper limb", "Lower limb", "Anterior trunk", "Posterior trunk")

# Creating a data frame for plotting
plot_data <- data.frame(Location = names(rounded_proportions), Percentage = rounded_proportions)

# Generating the bar plot
ggplot(plot_data, aes(x=Location, y=Percentage, fill=Location)) +
  geom_bar(stat="identity", color="black") +
  geom_text(aes(label=paste0(Percentage, "%")), vjust=-0.3, size=3.5) + ylim(0, 100) +
  scale_fill_manual(values=c("red", "green", "blue", "orange", "purple", "brown")) +
  theme_minimal() +
  labs(title="Proportion of burnt area Anatomic Location",
       x="",
       y="Percentage (%)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 0.7)) 

# Assuming your data is loaded into a dataframe called df
df <- read.csv("data.csv")

# Filter for "Yes" responses and sum counts for each pre-existing medical condition
pre_existing_conditions <- c("PreexistingmedicalconditionM",  # Malnutrition
                             "PreexistingmedicalconditionE",  # Epilepsy
                             "PreexistingmedicalconditionO")  # Other

# Convert data to numeric and replace NA with 0s
df[pre_existing_conditions] <- sapply(df[pre_existing_conditions], as.numeric)
df[is.na(df)] <- 0

# Summing the counts of "Yes" responses
sums <- colSums(df[pre_existing_conditions] == 1)

# Calculating proportions
total_responses <- 332
proportions <- (sums / total_responses) * 100
rounded_proportions <- round(proportions, 2)

# Rename pre-existing medical condition names for better readability
names(rounded_proportions) <- c("Malnutrition", "Epilepsy", "Other")

# Creating a data frame for plotting
plot_data <- data.frame(Condition = names(rounded_proportions), Percentage = rounded_proportions)

# Generating the bar plot
ggplot(plot_data, aes(x=Condition, y=Percentage, fill=Condition)) +
  geom_bar(stat="identity", color="black") +
  geom_text(aes(label=paste0(Percentage, "%")), vjust=-0.3, size=3.5) + ylim(0, 10) +
  scale_fill_manual(values=c("blue", "green", "red")) +
  theme_minimal() +
  labs(title="Proportion of participants with Pre-existing Medical Condition ",
       x="",
       y="Percentage (%)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 0.7)) 

# Assuming your data is loaded into a dataframe called df
df <- read.csv("data.csv")

# Create new variables based on presence of keywords in the 'Otherhospitalintervention' column
keywords <- c("Oxygen therapy", "Nutritional rehabilitation", "IV dexamethasone",
              "Blood transfusion", "Antacids", "Multivitamins")

# Use grepl to detect the presence of each keyword and create a new binary variable for each
for (keyword in keywords) {
  df[[keyword]] <- as.numeric(grepl(keyword, df$Otherhospitalintervention, ignore.case = TRUE))
}

# Summing the counts of "Yes" responses for each new keyword variable
sums <- colSums(df[, keywords])

# Calculating proportions
total_responses <- nrow(df)  # Using the total number of rows in the dataset as the denominator
proportions <- (sums / total_responses) * 100
rounded_proportions <- round(proportions, 2)

# Creating a data frame for plotting
plot_data <- data.frame(Intervention = names(rounded_proportions), Percentage = rounded_proportions)

# Generating the bar plot
ggplot(plot_data, aes(x=Intervention, y=Percentage, fill=Intervention)) +
  geom_bar(stat="identity", color="black") +
  geom_text(aes(label=paste0(Percentage, "%")), vjust=-0.3, size=3.5) +
  scale_fill_brewer(palette="Set2") +  # Using a color palette that provides good visual distinction
  theme_minimal() +
  labs(title="Proportion of Interventions Recorded in 'Other Hospital Intervention' (%)",
       x="Interventions",
       y="Percentage (%)") +
  theme(axis.text.x = element_text(angle=45, hjust=1))

# Assuming your data is loaded into a dataframe called df
df <- read.csv("data.csv")

# Create new variables based on presence of keywords in the 'Otherhospitalintervention' column
keywords <- c("Oxygen therapy", "Nutritional rehabilitation", "IV dexamethasone",
              "Blood transfusion", "Antacids", "Multivitamins")

# Additional intervention columns already in the dataset
additional_interventions <- c("HospitalinterventionSurgery",
                              "HospitalinterventionTetanust",
                              "HospitalinterventionAntibioti")

# Use grepl to detect the presence of each keyword and create a new binary variable for each
for (keyword in keywords) {
  df[[keyword]] <- as.numeric(grepl(keyword, df$Otherhospitalintervention, ignore.case = TRUE))
}

# Summing the counts of "Yes" responses for each new keyword variable and additional interventions
sums <- colSums(df[, c(keywords, additional_interventions), drop = FALSE])

# Calculating proportions
total_responses <- nrow(df)  # Using the total number of rows in the dataset as the denominator
proportions <- (sums / total_responses) * 100
rounded_proportions <- round(proportions, 2)

# Renaming indices in the summed results for better readability
names(rounded_proportions)[names(rounded_proportions) %in% additional_interventions] <- c("Surgery", "Tetanus toxoid", "Antibiotics")

# Creating a data frame for plotting
plot_data <- data.frame(Intervention = names(rounded_proportions), Percentage = rounded_proportions)

# Generating the bar plot
ggplot(plot_data, aes(x=Intervention, y=Percentage, fill=Intervention)) +
  geom_bar(stat="identity", color="black") +
  geom_text(aes(label=paste0(Percentage, "%")), vjust=-0.3, size=3.5) + ylim(0, 100) +
  scale_fill_brewer(palette="Set3") +  # Using a color palette that provides good visual distinction
  theme_minimal() +
  labs(title="Proportion of participants and Hospital Interventions received",
       x="",
       y="Percentage (%)") +
  theme(axis.text.x = element_text(angle=45, hjust=0.9))

# Assuming your data is loaded into a dataframe called df
df <- read.csv("data.csv")

# Define the outcome variables and their new names
outcome_variables <- c("OutcomeofchildContractures", "OutcomeofchildDisfigurement",
                       "OutcomeofchildScarsofgraft", "OutcomeofchildAmputation",
                       "OutcomeofchildDeath", "OutcomeofchildRecoverywithou",
                       "OutcomeofchildOther")

new_names <- c("Contractures", "Disfigurement", "Scar of graft", "Amputation",
               "Died", "No complication-Recovered", "Other")

# Create new variables by renaming the columns
df[new_names] <- df[, outcome_variables]

# Summing the counts of "Yes" responses for each outcome
sums <- colSums(df[new_names] == 1, na.rm = TRUE)  # Using na.rm = TRUE to handle missing values

# Calculating proportions
total_responses <- nrow(df)  # Using the total number of rows in the dataset as the denominator
proportions <- (sums / total_responses) * 100
rounded_proportions <- round(proportions, 2)

# Creating a data frame for plotting
plot_data <- data.frame(Outcome = names(rounded_proportions), Percentage = rounded_proportions)

# Generating the bar plot
ggplot(plot_data, aes(x=Outcome, y=Percentage, fill=Outcome)) +
  geom_bar(stat="identity", color="black") +
  geom_text(aes(label=paste0(Percentage, "%")), vjust=-0.3, size=3.5) + ylim(0, 60) +
  scale_fill_brewer(palette="Set2") +  # Using a color palette that provides good visual distinction
  theme_minimal() +
  labs(title="Proportion of Outcomes for Children with Burns",
       x="",
       y="Percentage (%)") +
  theme(axis.text.x = element_text(angle=45, hjust=0.9))

# Load necessary libraries
library(survival)
library(survminer)
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
# Read the dataset
df <- read.csv("data.csv")

# 'Death' is your event indicator and 'Time' is the time to event
# Ensure 'Death' is coded as 0 = censored, 1 = event occurred
surv_obj <- Surv(time = df$Time, event = df$Death)

# Fit the survival curve using Kaplan-Meier estimate
fit <- survfit(surv_obj ~ 1)

# Plotting the Kaplan-Meier survival curve
ggsurvplot(fit, 
           data = df,
           risk.table = F,           # Add a risk table
           pval = TRUE,                 # Show p-value of the log-rank test
           conf.int = F,             # Show the confidence interval
           xlim = c(0, max(df$Time)),   # Set limits for x-axis
           ylim = c(0, 1),               # Set limits for y-axis
           xlab = "Time to Death",      # Label for x-axis
           ylab = "Survival Probability",# Label for y-axis
           title = "Kaplan-Meier Survival Curve")  # Title of the plot
## Warning in .pvalue(fit, data = data, method = method, pval = pval, pval.coord = pval.coord, : There are no survival curves to be compared. 
##  This is a null model.

# Save the plot to a PDF file
#pdf("Kaplan_Meier_Plot.pdf")
#ggsurvplot(fit, 
           #data = df,
           #risk.table = TRUE,
           #pval = TRUE,
           #conf.int = TRUE,
           #xlim = c(0, max(df$Time)),
           #xlab = "Time to Death",
           #ylab = "Survival Probability",
           #title = "Kaplan-Meier Survival Curve")
#dev.off()
# Read the dataset
df <- read.csv("data.csv")

#make Gender a factor
df$Gender <- as.factor(df$Gender)

# Create a survival object
surv_obj <- Surv(time = df$Time, event = df$Death)

# Fit the survival curve using Kaplan-Meier estimate stratified by Gender
fit <- survfit(surv_obj ~ Gender, data = df)

# Plotting the Kaplan-Meier survival curve stratified by Gender
# and include the log-rank test for comparison
ggsurvplot(fit,
           data = df,
           pval = TRUE,                # Include the log-rank test p-value
           #p-value position upper right
           pval.coord = c(0, 0.4),
           pval.size = 5,
           
           conf.int = F,             # Show confidence intervals
           palette = c("#FF0000", "#0000FF"),  # Colors for Female and Male
           risk.table = F,           # Add risk table below the plot
           xlim = c(0, max(df$Time)),   # Set x-axis limits
           ylim = c(0.25, 1),               # Set y-axis limits
           xlab = "Time to Event",      # Label for x-axis
           ylab = "Survival Probability",# Label for y-axis
           legend.title = "Gender",     # Legend title
           title = "Kaplan-Meier Survival Curve by Gender")  # Title of the plot

library(ggrepel)

# Read the dataset
df <- read.csv("data.csv")

# Calculate the proportion of each category
circumstances_proportions <- table(df$Circumstancesofthebur) / nrow(df) * 100
circumstances_proportions <- round(circumstances_proportions, 2)  # Round to two decimal places

# Create a data frame from the table for plotting
circumstances_data <- data.frame(Category = names(circumstances_proportions),
                                 Proportion = as.numeric(circumstances_proportions),
                                 Label = paste0(names(circumstances_proportions), " (", as.numeric(circumstances_proportions), "%)"))

# Convert the data to a factor to maintain category order in the plot
circumstances_data$Category <- factor(circumstances_data$Category, levels = circumstances_data$Category)

# Create the pie chart
pie_chart <- ggplot(circumstances_data, aes(x = "", y = Proportion, fill = Category)) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar(theta = "y") +
  theme_void() +
  theme(legend.title = element_blank()) +
  geom_label_repel(aes(label = Label), 
                   position = position_stack(vjust = 0.5),
                   box.padding = 1, 
                   point.padding = 3,
                   segment.color = 'grey50',
                   size = 3)

# Display the pie chart
print(pie_chart)

# Save the pie chart as a PNG file
ggsave("circumstances_pie_chart_with_arrows.png", plot = pie_chart, width = 10, height = 8, units = "in")
library(ggrepel)

# Read the dataset
df <- read.csv("data.csv")

# Assuming 'depthofburn' is the column of interest
# Calculate the proportion of each category
circumstances_proportions <- table(df$depthofburn) / nrow(df) * 100
circumstances_proportions <- round(circumstances_proportions, 2)  # Round to two decimal places

# Create a data frame from the table for plotting
circumstances_data <- data.frame(Category = names(circumstances_proportions),
                                 Proportion = as.numeric(circumstances_proportions),
                                 Label = paste0(names(circumstances_proportions), " (", as.numeric(circumstances_proportions), "%)"))

# Convert the data to a factor to maintain category order in the plot
circumstances_data$Category <- factor(circumstances_data$Category, levels = circumstances_data$Category)

# Create the pie chart
pie_chart <- ggplot(circumstances_data, aes(x = "", y = Proportion, fill = Category)) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar(theta = "y") +
  theme_void() +
  theme(legend.title = element_blank()) +
  geom_label_repel(aes(label = Label), 
                   position = position_stack(vjust = 0.5),
                   box.padding = 1, 
                   point.padding = 3,
                   segment.color = 'grey50',
                   size = 3)

# Display the pie chart
print(pie_chart)

# Save the pie chart as a PNG file
ggsave("circumstances_pie_chart_with_arrows.png", plot = pie_chart, width = 10, height = 8, units = "in")
# Read the dataset
df <- read.csv("data.csv")


#make prehospitalintervention a factor
df$prehospitalintervention <- as.factor(df$prehospitalintervention)

# Create a survival object
surv_obj <- Surv(time = df$Time, event = df$Death)

# Fit the survival curve using Kaplan-Meier estimate stratified by prehospitalintervention
fit <- survfit(surv_obj ~ prehospitalintervention, data = df)

# Plotting the Kaplan-Meier survival curve stratified by prehospitalintervention
# and include the log-rank test for comparison
ggsurvplot(fit,
           data = df,
           pval = TRUE,                # Include the log-rank test p-value
           #p-value position upper right
           pval.coord = c(0, 0.4),
           pval.size = 5,
           
           conf.int = F,             # Show confidence intervals
           palette = c("#FF0000", "#0000FF"),  # Colors for Female and Male
           risk.table = F,           # Add risk table below the plot
           xlim = c(0, max(df$Time)),   # Set x-axis limits
           ylim = c(0.25, 1),               # Set y-axis limits
           xlab = "Time to Event",      # Label for x-axis
           ylab = "Survival Probability",# Label for y-axis
           legend.title = "Prehospital intervention",     # Legend title
           title = "Kaplan-Meier Survival Curve by prehospital intervention")  # Title of the plot

# Read the dataset
df <- read.csv("data.csv")


#make Residenc a factor
df$Residenc <- as.factor(df$Residenc)

# Create a survival object
surv_obj <- Surv(time = df$Time, event = df$Death)

# Fit the survival curve using Kaplan-Meier estimate stratified by Residenc
fit <- survfit(surv_obj ~ Residenc, data = df)

# Plotting the Kaplan-Meier survival curve stratified by Residenc
# and include the log-rank test for comparison
ggsurvplot(fit,
           data = df,
           pval = TRUE,                # Include the log-rank test p-value
           #p-value position upper right
           pval.coord = c(0, 0.4),
           pval.size = 5,
           
           conf.int = F,             # Show confidence intervals
           palette = c("#FF0000", "#0000FF"),  # Colors for Female and Male
           risk.table = F,           # Add risk table below the plot
           xlim = c(0, max(df$Time)),   # Set x-axis limits
           ylim = c(0.25, 1),               # Set y-axis limits
           xlab = "Time to Event",      # Label for x-axis
           ylab = "Survival Probability",# Label for y-axis
           legend.title = "Residence",     # Legend title
           title = "Kaplan-Meier Survival Curve by Residence")  # Title of the plot

#scatter plot of age and length of hospital stay by death
# Read the dataset
df <- read.csv("data.csv")

#name Death values (1=Died, 0=Alive)
df$Death <- ifelse(df$Death == 1, "Died", "Alive")

#make Death a factor
df$Death <- as.factor(df$Death)
#make depthofburn a factor
df$depthofburn <- as.factor(df$depthofburn)

ggplot(df, aes(y = Age, x = Lengthofhospitalstaydays, color = Death)) +
  geom_point( aes(shape = depthofburn)) +
  labs(title = "Scatter Plot of Age and Length of Hospital Stay",
       y = "Age",
       x = "Length of Hospital Stay",
       color = "Vital status",
       shape = "Depth of burns") +
  theme_minimal()

#scatter plot of age and TBSA stay by death
# Read the dataset
df <- read.csv("data.csv")

#name Death values (1=Died, 0=Alive)
df$Death <- ifelse(df$Death == 1, "Died", "Alive")

#make Death a factor
df$Death <- as.factor(df$Death)
#make depthofburn a factor
df$depthofburn <- as.factor(df$depthofburn)

ggplot(df, aes(y = Age, x = TBSA, color = Death)) +
  geom_point( aes(shape = depthofburn)) +
  labs(title = "Scatter Plot of Age and TBSA ",
       y = "Age",
       x = "TBSA (%)",
       color = "Vital status",
       shape = "Depth of burns") +
  theme_minimal()