Markdown script for creating and comparing suvivorship curves for passive B.b transfer

#library(ggthemes)
#library(ecotox)
#library(tidyverse)
#library(dplyr)
#library(ggplot2)
#library(wesanderson)
#library(ggsci)
#library(survival)
#library(survminer)
#library(readxl) 

Survivorship curves

Kaplan-Meier curve finds the probability of an event at a time interval
The y axis of these curves is probability of survival for an individual at some time from the start of the study, x- axis.
1) The probability of survival of an individual at a time is the proportion of surving indivduals at time (t) multiplied by the previous proportion of surviving individuals. S(t) =([@[Number at risk (Nt)]]-[@[deaths (dt)]])/[@[Number at risk (Nt)]] * S(t)(previous time)
2) Data is coming directly from an uploaded excel file with columns: station (cage #), treatment (treat or control), individual (fly number), gender (m or f), ttd (time to death), dead (0 for alive or censored; or 1 for dead.)
3) Each fly should be its own row in the excel file. Any live flies at the end of the study should be recorded as ttd = last day and dead = 0
##read in file 
kap <- read_excel("C:/Users/sarah/Desktop/Masters program/Masters project/Grant_year_2/new_cone_testing/newcone_tp_3_23_24/kap_3_24all_tp.xlsx")
str(kap)
## tibble [635 × 6] (S3: tbl_df/tbl/data.frame)
##  $ station   : chr [1:635] "thick plate" "thick plate" "thick plate" "thick plate" ...
##  $ gender    : chr [1:635] "m" "m" "m" "m" ...
##  $ treatment : chr [1:635] "c" "c" "c" "c" ...
##  $ individual: num [1:635] 1 2 3 4 5 6 7 8 9 10 ...
##  $ ttd       : num [1:635] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dead      : num [1:635] 1 1 1 1 1 1 1 1 1 1 ...
##using survival and survminer packages for kaplan meier data_analysis
##1) Data fitted to survival analysis format
t.km.force <- Surv(time = kap$ttd, event = kap$dead)

##2) Survival probabilities found 
    ## Include what factors you want the data to be split into when finding the survival probablities with "~ factors" 
t.force.treat = survfit(t.km.force ~ treatment + gender, data = kap, type = 'kaplan-meier', conf.type='log', conf.int = 0.95) #Includes confidence interval 

summary(t.force.treat)
## Call: survfit(formula = t.km.force ~ treatment + gender, data = kap, 
##     type = "kaplan-meier", conf.type = "log", conf.int = 0.95)
## 
##                 treatment=c, gender=f 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1    105       8    0.924  0.0259        0.874        0.976
##     2     97       2    0.905  0.0286        0.850        0.963
##     3     95       2    0.886  0.0310        0.827        0.949
##     4     93       2    0.867  0.0332        0.804        0.934
##     7     91       3    0.838  0.0359        0.771        0.912
##    17     88       1    0.829  0.0368        0.760        0.904
##    19     87       1    0.819  0.0376        0.749        0.896
##    27     86       6    0.762  0.0416        0.685        0.848
## 
##                 treatment=c, gender=m 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1    114      11    0.904  0.0277        0.851        0.959
##     2    103      11    0.807  0.0370        0.738        0.883
##     3     92       6    0.754  0.0403        0.679        0.838
##     4     86       3    0.728  0.0417        0.651        0.815
##     5     83       3    0.702  0.0428        0.623        0.791
##     6     80       4    0.667  0.0442        0.586        0.759
##     7     76       4    0.632  0.0452        0.549        0.727
##    10     72       2    0.614  0.0456        0.531        0.710
##    11     70       1    0.605  0.0458        0.522        0.702
##    12     69       1    0.596  0.0459        0.513        0.694
##    13     68       1    0.588  0.0461        0.504        0.685
##    15     67       2    0.570  0.0464        0.486        0.669
##    16     65       3    0.544  0.0466        0.460        0.643
##    27     62       7    0.482  0.0468        0.399        0.583
## 
##                 treatment=t, gender=f 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1    166      17    0.898  0.0235        0.853        0.945
##     2    149      12    0.825  0.0295        0.770        0.885
##     3    137       3    0.807  0.0306        0.749        0.870
##     4    134       5    0.777  0.0323        0.716        0.843
##     6    129       4    0.753  0.0335        0.690        0.822
##     7    125       6    0.717  0.0350        0.652        0.789
##     8    119       9    0.663  0.0367        0.594        0.739
##     9    110       4    0.639  0.0373        0.569        0.716
##    10    106      10    0.578  0.0383        0.508        0.659
##    11     96       5    0.548  0.0386        0.477        0.629
##    12     91       6    0.512  0.0388        0.441        0.594
##    13     85       6    0.476  0.0388        0.406        0.558
##    14     79       5    0.446  0.0386        0.376        0.528
##    15     74       3    0.428  0.0384        0.359        0.510
##    16     71       4    0.404  0.0381        0.335        0.486
##    18     67       3    0.386  0.0378        0.318        0.467
##    19     64       2    0.373  0.0375        0.307        0.455
##    20     62       1    0.367  0.0374        0.301        0.449
##    27     61       4    0.343  0.0369        0.278        0.424
## 
##                 treatment=t, gender=m 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1    250      89    0.644  0.0303       0.5873       0.7062
##     2    161      25    0.544  0.0315       0.4856       0.6094
##     3    136      23    0.452  0.0315       0.3943       0.5181
##     4    113      22    0.364  0.0304       0.3090       0.4288
##     5     91       1    0.360  0.0304       0.3052       0.4247
##     6     90      15    0.300  0.0290       0.2482       0.3625
##     7     75      13    0.248  0.0273       0.1999       0.3077
##     8     62       7    0.220  0.0262       0.1742       0.2778
##     9     55       7    0.192  0.0249       0.1489       0.2476
##    10     48       7    0.164  0.0234       0.1240       0.2170
##    11     41       6    0.140  0.0219       0.1030       0.1904
##    12     35       6    0.116  0.0203       0.0824       0.1633
##    13     29       1    0.112  0.0199       0.0790       0.1588
##    14     28       2    0.104  0.0193       0.0723       0.1496
##    16     26       3    0.092  0.0183       0.0623       0.1358
##    17     23       2    0.084  0.0175       0.0558       0.1265
##    20     21       1    0.080  0.0172       0.0525       0.1218
##    27     20       8    0.048  0.0135       0.0276       0.0834
Making the survival curve
### Plotting the result with all of the males 

kapplot <- ggsurvplot(t.force.treat, #output where the survival probs were found 
                      kap, #Original data name 
                      conf.int = TRUE, #Include confidence interval 
                      pval = FALSE,  
                      #All the labels are built into this part of the plot 
                                title = "Thick plate", 
                                subtitle = "Survival")
kapplot #View the plot 

#Cannot change the scale with ggsurvplot (easily), so we can use ggplot2 to do this 
    #Plot_name$plot wil lcall the plot so ggplot2 can use it
kapplot <- kapplot$plot + scale_x_continuous(
    limits = c(0, 30),  # Change the x-axis limits (e.g., from 0 to 1000)
    breaks = seq(0, 30, by = 2)) + theme_clean()                                  
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
kapplot #View the plot 

The log-rank test is the most widely used method of comparing two or more survival curves.
The null hypothesis is that there is no difference in survival between the two groups.
male_survdiff <- survdiff(formula = t.km.force ~kap$treatment, subset = c(gender == "m" ), data = kap)

female_survdiff <- survdiff(formula = t.km.force ~kap$treatment, subset = c(gender == "f" ), data = kap) 

print(male_survdiff)
## Call:
## survdiff(formula = t.km.force ~ kap$treatment, data = kap, subset = c(gender == 
##     "m"))
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## kap$treatment=c 114       59      132      40.0      92.1
## kap$treatment=t 250      238      165      31.8      92.1
## 
##  Chisq= 92.1  on 1 degrees of freedom, p= <2e-16
print(female_survdiff)
## Call:
## survdiff(formula = t.km.force ~ kap$treatment, data = kap, subset = c(gender == 
##     "f"))
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## kap$treatment=c 105       25       61      21.2      41.5
## kap$treatment=t 166      109       73      17.8      41.5
## 
##  Chisq= 41.5  on 1 degrees of freedom, p= 1e-10
##Significnatly more males and females died in the treatment group as compared to the males and females in the control groups