Load the necessary R libraries

If you haven’t installed these packages yet, please use the “install.packages” command to install them first

#install.packages("readxl")
#install.packages("dplyr")
#install.packages("ggplot2")
#install.packages("cluster")
#install.packages("table1")
#install.packages("ggpubr")
#install.packages("factoextra")
#install.packages("ggrepel")
#install.packages("ggfortify")
#install.packages("rstan")
#install.packages("boot")
#install.packages("kableExtra")
#install.packages("pscl")

library(readxl)
library(dplyr)
library(ggplot2)
library(cluster)
library(table1)
library(ggpubr)
library(factoextra)
library(ggrepel)
library(ggfortify)
library(rstan)
library(boot)
library(kableExtra)
library(pscl)

Import the Tcell data

# Please change the file directory to where you have saved the "Discharged_Tcell_dat.xlsx"
Tcell_discharged = read_excel("C:/Users/mindy/Dropbox/Novel Corona Virus/Paper draft/COVID_data/Discharged_Tcell_dat.xlsx")


# Please change the file directory to where you have saved the "Death_Tcell_dat.xlsx"
Tcell_death = read_excel("C:/Users/mindy/Dropbox/Novel Corona Virus/Paper draft/COVID_data/Death_Tcell_dat.xlsx")

Calculate the time intervals in days

Tcell_discharged = Tcell_discharged %>% 
  mutate(Dis2Hosp = as.numeric(difftime(DateofDischarge, DateofHosp,units = "days")),
         Hosp2Symp = as.numeric(difftime(DateofHosp, DateofSymp, units="days")),
         Dis2Symp = as.numeric(difftime(DateofDischarge, DateofSymp, units="days"))) %>%
  mutate(Tcell2Symp = as.numeric(difftime(DateofTest, DateofSymp, units = "days")),
         Tcell2Hosp = as.numeric(difftime(DateofTest, DateofHosp,units = "days")),
         Dis2Tcell = as.numeric(difftime(DateofDischarge, DateofTest, units = "days"))) %>%
  mutate(Disease=ifelse(UnderDisease=="yes",1,0))


Tcell_death = Tcell_death %>% 
  mutate(Death2Hosp = as.numeric(difftime(DateofDeath, DateofHosp,units = "days")),
         Death2Symp = as.numeric(difftime(DateofDeath, DateofSymp, units="days")),
         Hosp2Symp = as.numeric(difftime(DateofHosp, DateofSymp, units="days"))) %>%
  mutate(Tcell2Symp = as.numeric(difftime(DateofTest, DateofSymp,units = "days")), 
         Tcell2Hosp = as.numeric(difftime(DateofTest, DateofHosp,units = "days")),
         Death2Tcell = as.numeric(difftime(DateofDeath, DateofTest, units = "days")))

Select the first Tcell test result after hospitalization as the baseline

Tcell_death_initial = Tcell_death %>%
  group_by(PTID) %>% filter(DateofTest==min(DateofTest)) %>% mutate(Group="Death") %>%
  mutate(Sex=ifelse(Gender=="M", 1, 0), Disease=ifelse(UnderDisease=="yes",1,0)) %>%
  select(PTID, Group, Sex, Age, Disease, Death2Hosp, Death2Symp, Hosp2Symp,
         Tcell_item1, Tcell_item2, Tcell_item3, Tcell_item4, Tcell_item5, 
         Tcell_item6, Tcell_item7, Tcell_item8)

Tcell_discharged_initial = Tcell_discharged %>% 
  group_by(PTID) %>% filter(DateofTest==min(DateofTest)) %>% mutate(Group="Discharged") %>%
  mutate(Sex=ifelse(Gender=="M", 1, 0), Disease=ifelse(UnderDisease=="yes",1,0))%>%
  select(PTID, Group, Sex, Age, Disease, Dis2Hosp, Dis2Symp, Hosp2Symp,
         Tcell_item1, Tcell_item2, Tcell_item3, Tcell_item4, Tcell_item5, 
         Tcell_item6, Tcell_item7, Tcell_item8)

Produce Table1

Tcell_discharged_initial$Disease = factor(Tcell_discharged_initial$Disease, 
                                          levels=c("0","1"), labels=c("No", "Yes"))

label(Tcell_discharged_initial$Disease) = "Underlying Medical Condition"

Tcell_discharged_initial$Gender = factor(Tcell_discharged_initial$Sex, 
                                         levels=c("0","1"), labels=c("Female", "Male"))

label(Tcell_discharged_initial$Dis2Hosp) = "Duration of Hospitalization"
units(Tcell_discharged_initial$Dis2Hosp) = "days"

label(Tcell_discharged_initial$Dis2Symp) = "From the First Symptom to Hospital Discharge"
units(Tcell_discharged_initial$Dis2Symp) = "days"

label(Tcell_discharged_initial$Hosp2Symp) = "From the First Symptom to Hospitalization"
units(Tcell_discharged_initial$Hosp2Symp) = "days"

table1(data=Tcell_discharged_initial, ~Age + Disease + Dis2Hosp + Hosp2Symp + 
         Dis2Symp|Gender, topclass = "Rtable1-zebra")
Female
(n=156)
Male
(n=154)
Overall
(n=310)
Age
Mean (SD) 58.8 (13.2) 58.1 (14.7) 58.5 (13.9)
Median [Min, Max] 59.5 [21.0, 86.0] 60.5 [22.0, 90.0] 60.0 [21.0, 90.0]
Underlying Medical Condition
No 112 (71.8%) 90 (58.4%) 202 (65.2%)
Yes 44 (28.2%) 64 (41.6%) 108 (34.8%)
Duration of Hospitalization (days)
Mean (SD) 12.2 (6.41) 13.9 (8.38) 13.0 (7.48)
Median [Min, Max] 12.0 [2.00, 33.0] 12.0 [2.00, 41.0] 12.0 [2.00, 41.0]
From the First Symptom to Hospitalization (days)
Mean (SD) 14.3 (9.30) 14.4 (9.90) 14.4 (9.59)
Median [Min, Max] 11.0 [2.00, 43.0] 11.0 [1.00, 57.0] 11.0 [1.00, 57.0]
From the First Symptom to Hospital Discharge (days)
Mean (SD) 26.6 (8.63) 28.2 (11.1) 27.4 (9.94)
Median [Min, Max] 25.0 [11.0, 54.0] 26.0 [7.00, 71.0] 25.5 [7.00, 71.0]

Produce Table2

Tcell_death_initial$Disease = factor(Tcell_death_initial$Disease, levels=c("0","1"),
                                  labels=c("No", "Yes"))

label(Tcell_death_initial$Disease) = "Underlying Medical Condition"

Tcell_death_initial$Gender = factor(Tcell_death_initial$Sex, levels=c("0","1"),
                                  labels=c("Female", "Male"))

label(Tcell_death_initial$Death2Hosp) = "Duration of Hospitalization"
units(Tcell_death_initial$Death2Hosp) = "days"

label(Tcell_death_initial$Death2Symp) = "From the First Symptom to Death"
units(Tcell_death_initial$Death2Symp) = "days"

label(Tcell_death_initial$Hosp2Symp) = "From the First Symptom to Hospitalization"
units(Tcell_death_initial$Hosp2Symp) = "days"

table1(data=Tcell_death_initial, ~Age + Disease + Death2Hosp + Hosp2Symp + 
         Death2Symp|Gender, topclass = "Rtable1-zebra")
Female
(n=13)
Male
(n=17)
Overall
(n=30)
Age
Mean (SD) 68.9 (6.42) 69.1 (9.01) 69.0 (7.87)
Median [Min, Max] 69.0 [60.0, 85.0] 69.0 [51.0, 81.0] 69.0 [51.0, 85.0]
Underlying Medical Condition
No 4 (30.8%) 3 (17.6%) 7 (23.3%)
Yes 9 (69.2%) 14 (82.4%) 23 (76.7%)
Duration of Hospitalization (days)
Mean (SD) 15.2 (9.93) 15.0 (8.12) 15.1 (8.78)
Median [Min, Max] 12.0 [2.00, 32.0] 13.0 [5.00, 39.0] 12.5 [2.00, 39.0]
From the First Symptom to Hospitalization (days)
Mean (SD) 13.5 (11.2) 10.2 (4.72) 11.7 (8.20)
Median [Min, Max] 10.0 [3.00, 42.0] 9.00 [4.00, 21.0] 10.0 [3.00, 42.0]
From the First Symptom to Death (days)
Mean (SD) 28.7 (12.2) 25.2 (8.00) 26.7 (10.0)
Median [Min, Max] 27.0 [8.00, 53.0] 23.0 [16.0, 48.0] 25.5 [8.00, 53.0]

Produce Table3

Tcell_discharged_initial$Sex = factor(Tcell_discharged_initial$Sex, levels=c("0","1"),
                                  labels=c("Female", "Male"))

label(Tcell_discharged_initial$Tcell_item1) = "Total Lymph Cell Percentage"
units(Tcell_discharged_initial$Tcell_item1) = "%"

label(Tcell_discharged_initial$Tcell_item2) = "Total Lymph Cell Counts"
units(Tcell_discharged_initial$Tcell_item2) = "cells/ul"

label(Tcell_discharged_initial$Tcell_item3) = "Helper Lymph Cell Percentage"
units(Tcell_discharged_initial$Tcell_item3) = "%"

label(Tcell_discharged_initial$Tcell_item4) = "Helper Lymph Cell Counts"
units(Tcell_discharged_initial$Tcell_item4) = "cells/ul"

label(Tcell_discharged_initial$Tcell_item5) = "Suppressor Lymph Cell Percentage"
units(Tcell_discharged_initial$Tcell_item5) = "%"

label(Tcell_discharged_initial$Tcell_item6) = "Suppressor Lymph Cell Counts"
units(Tcell_discharged_initial$Tcell_item6) = "cells/ul"

label(Tcell_discharged_initial$Tcell_item7) = "Th/Ts(TH/TS)"

label(Tcell_discharged_initial$Tcell_item8) = "Lymph Cell Counts"
units(Tcell_discharged_initial$Tcell_item8) = "cells/ul"

table1(data=Tcell_discharged_initial, ~Tcell_item1 + Tcell_item2 + Tcell_item3 + 
         Tcell_item4 + Tcell_item5 + Tcell_item6 + Tcell_item7 +Tcell_item8|Sex, 
       topclass = "Rtable1-zebra")
Female
(n=156)
Male
(n=154)
Overall
(n=310)
Total Lymph Cell Percentage (%)
Mean (SD) 60.2 (11.2) 58.1 (12.0) 59.2 (11.6)
Median [Min, Max] 62.6 [27.7, 82.4] 58.6 [21.1, 82.7] 61.2 [21.1, 82.7]
Total Lymph Cell Counts (cells/ul)
Mean (SD) 728 (496) 700 (508) 714 (502)
Median [Min, Max] 623 [69.6, 2070] 656 [40.1, 2300] 637 [40.1, 2300]
Helper Lymph Cell Percentage (%)
Mean (SD) 34.7 (9.53) 33.8 (9.93) 34.3 (9.73)
Median [Min, Max] 35.6 [14.2, 58.7] 34.3 [6.84, 56.6] 34.5 [6.84, 58.7]
Helper Lymph Cell Counts (cells/ul)
Mean (SD) 422 (299) 418 (329) 420 (314)
Median [Min, Max] 344 [38.0, 1410] 340 [15.0, 1700] 340 [15.0, 1700]
Suppressor Lymph Cell Percentage (%)
Mean (SD) 25.5 (8.46) 24.3 (8.79) 24.9 (8.63)
Median [Min, Max] 24.9 [6.80, 49.1] 23.2 [6.75, 55.5] 24.1 [6.75, 55.5]
Suppressor Lymph Cell Counts (cells/ul)
Mean (SD) 306 (225) 282 (210) 294 (218)
Median [Min, Max] 251 [27.0, 1120] 244 [18.3, 1070] 247 [18.3, 1120]
Th/Ts(TH/TS)
Mean (SD) 1.57 (0.869) 1.60 (0.785) 1.58 (0.827)
Median [Min, Max] 1.47 [0.323, 5.64] 1.46 [0.123, 4.86] 1.47 [0.123, 5.64]
Lymph Cell Counts (cells/ul)
Mean (SD) 1150 (693) 1160 (792) 1160 (743)
Median [Min, Max] 1010 [237, 3210] 1010 [93.8, 4290] 1010 [93.8, 4290]

Produce Table4

Tcell_death_initial$Sex = factor(Tcell_death_initial$Sex, levels=c("0","1"),
                                  labels=c("Female", "Male"))

label(Tcell_death_initial$Tcell_item1) = "Total Lymph Cell Percentage"
units(Tcell_death_initial$Tcell_item1) = "%"

label(Tcell_death_initial$Tcell_item2) = "Total Lymph Cell Counts"
units(Tcell_death_initial$Tcell_item2) = "cells/ul"

label(Tcell_death_initial$Tcell_item3) = "Helper Lymph Cell Percentage"
units(Tcell_death_initial$Tcell_item3) = "%"

label(Tcell_death_initial$Tcell_item4) = "Helper Lymph Cell Counts"
units(Tcell_death_initial$Tcell_item4) = "cells/ul"

label(Tcell_death_initial$Tcell_item5) = "Suppressor Lymph Cell Percentage"
units(Tcell_death_initial$Tcell_item5) = "%"

label(Tcell_death_initial$Tcell_item6) = "Suppressor Lymph Cell Counts"
units(Tcell_death_initial$Tcell_item6) = "cells/ul"

label(Tcell_death_initial$Tcell_item7) = "Th/Ts(TH/TS)"

label(Tcell_death_initial$Tcell_item8) = "Lymph Cell counts"
units(Tcell_death_initial$Tcell_item8) = "cells/ul"

table1(data=Tcell_death_initial, ~Tcell_item1 + Tcell_item2 + Tcell_item3 + 
         Tcell_item4 + Tcell_item5 + Tcell_item6 + Tcell_item7 + Tcell_item8|Sex, 
       topclass = "Rtable1-zebra")
Female
(n=13)
Male
(n=17)
Overall
(n=30)
Total Lymph Cell Percentage (%)
Mean (SD) 50.9 (14.1) 52.1 (14.2) 51.6 (14.0)
Median [Min, Max] 47.9 [26.6, 75.4] 53.4 [18.5, 72.0] 51.7 [18.5, 75.4]
Total Lymph Cell Counts (cells/ul)
Mean (SD) 240 (129) 206 (184) 221 (161)
Median [Min, Max] 212 [78.0, 456] 110 [35.8, 719] 170 [35.8, 719]
Helper Lymph Cell Percentage (%)
Mean (SD) 34.4 (13.3) 34.7 (11.5) 34.6 (12.1)
Median [Min, Max] 38.0 [13.9, 51.6] 36.6 [13.9, 52.0] 37.3 [13.9, 52.0]
Helper Lymph Cell Counts (cells/ul)
Mean (SD) 165 (113) 123 (88.0) 141 (99.9)
Median [Min, Max] 129 [53.9, 385] 77.8 [24.5, 322] 117 [24.5, 385]
Suppressor Lymph Cell Percentage (%)
Mean (SD) 16.5 (6.58) 17.3 (10.0) 17.0 (8.56)
Median [Min, Max] 19.0 [7.42, 27.2] 15.2 [4.55, 47.6] 16.0 [4.55, 47.6]
Suppressor Lymph Cell Counts (cells/ul)
Mean (SD) 75.2 (47.9) 82.4 (122) 79.3 (95.7)
Median [Min, Max] 70.8 [21.8, 211] 44.6 [11.3, 512] 55.7 [11.3, 512]
Th/Ts(TH/TS)
Mean (SD) 2.50 (1.42) 2.51 (1.31) 2.51 (1.34)
Median [Min, Max] 2.26 [0.510, 5.45] 2.42 [0.403, 5.88] 2.34 [0.403, 5.88]
Lymph Cell counts (cells/ul)
Mean (SD) 474 (232) 385 (269) 424 (253)
Median [Min, Max] 381 [126, 951] 304 [67.0, 1080] 374 [67.0, 1080]

Combine Data and scale the selected subset of the data into standardized normal distribution Normal(0,1)

Initial_combined = rbind(subset(Tcell_discharged_initial,select = -c(Dis2Hosp,Dis2Symp)), 
                         subset(Tcell_death_initial, select = -c(Death2Hosp,Death2Symp)))


Tcell_3ind = Initial_combined[,c("Age", "Disease", "Tcell_item4","Tcell_item6","Tcell_item7","Tcell_item2","Group")]

colnames(Tcell_3ind) = c("Age", "Disease", "Helper_Lymph_Cell", 
                         "Suppressor_Lymph_Cell", "TH_TS_ratio","Total_Lymph_Cell", "Group")   

Tcell_3ind_scaled = Tcell_3ind
Tcell_3ind_scaled$Disease = ifelse(Tcell_3ind_scaled$Disease == "Yes",1,0)
Tcell_3ind_scaled = as.data.frame(scale(Tcell_3ind_scaled[,-c(6:7)]))

colnames(Tcell_3ind_scaled) = c("Age", "Disease", "Helper_Lymph_Cell", 
                                "Suppressor_Lymph_Cell", "TH_TS_ratio")   

Produce Figure2

Tcell_discharged_initial %>%
  select(Age, Disease, Dis2Hosp) %>%
  ggplot(aes(x=Age, y=Dis2Hosp, color=Disease)) + 
  geom_point()+ theme_minimal()+ theme(legend.title=element_blank())+ 
  scale_color_manual(values=c("lightpink", "royalblue"))+ 
  geom_smooth(method='lm', formula= y~x)+ylab("Duration of Hospitalization")

Tcell_death_initial %>%
  select(Age, Disease, Death2Hosp) %>%
  ggplot(aes(x=Age, y=Death2Hosp, color=Disease)) + 
  geom_point()+ theme_minimal()+ theme(legend.title=element_blank())+ 
  scale_color_manual(values=c("lightpink", "royalblue"))+ 
  geom_smooth(method='lm', formula= y~x)+ylab("Duration of Hospitalization")

Produce Figure3

ggboxplot(data=Initial_combined, y="Tcell_item2", x="Group", fill="Group") + 
  stat_compare_means(method = "t.test") + 
  geom_boxplot(alpha=0.4,color="lightgrey") + 
  scale_fill_manual(name="Group", labels=c("Death","Discharged"), 
                    breaks = c("Death","Discharged"),
                    values=c("lightcoral", "mediumseagreen")) + 
  theme_minimal() + xlab("") + ylab("") + theme(legend.position = "none")

ggboxplot(data=Initial_combined, y="Tcell_item4", x="Group", fill="Group") + 
  stat_compare_means(method = "t.test") + 
  geom_boxplot(alpha=0.4,color="lightgrey") + 
  scale_fill_manual(name="Group", labels=c("Death","Discharged"), 
                    breaks = c("Death","Discharged"),
                    values=c("lightcoral", "mediumseagreen")) + 
  theme_minimal()+xlab("")+ylab("") + theme(legend.position = "none")

ggboxplot(data=Initial_combined, y="Tcell_item6", x="Group", fill="Group") + 
  stat_compare_means(method = "t.test") + 
  geom_boxplot(alpha=0.4,color="lightgrey") + 
  scale_fill_manual(name="Group", labels=c("Death","Discharged"), 
                    breaks = c("Death","Discharged"),
                    values=c("lightcoral", "mediumseagreen")) + 
  theme_minimal()+xlab("")+ylab("") + theme(legend.position = "none")

ggboxplot(data=Initial_combined, y="Tcell_item7", x="Group", fill="Group") + 
  stat_compare_means(method = "t.test") + 
  geom_boxplot(alpha=0.4,color="lightgrey") + 
  scale_fill_manual(name="Group", labels=c("Death","Discharged"), 
                    breaks = c("Death","Discharged"),
                    values=c("lightcoral", "mediumseagreen")) + 
  theme_minimal()+xlab("")+ylab("") + theme(legend.position = "none")

Produce Figure4

Tcell_3ind %>%
  ggplot(aes(y=Helper_Lymph_Cell, x=Total_Lymph_Cell, color = factor(Group), label = Group)) + 
  geom_point()+ theme_minimal()+ theme(legend.title=element_blank())+ 
  scale_color_manual(values=c("lightcoral", "mediumseagreen"))

Tcell_3ind %>%
  ggplot(aes(y=Suppressor_Lymph_Cell, x=Total_Lymph_Cell, color = factor(Group), label = Group)) + 
  geom_point()+ theme_minimal()+ theme(legend.title=element_blank())+ 
  scale_color_manual(values=c("lightcoral", "mediumseagreen"))

Tcell_3ind %>%
  ggplot(aes(y=Suppressor_Lymph_Cell, x=Helper_Lymph_Cell, color = factor(Group), label = Group)) + 
  geom_point()+ theme_minimal()+ theme(legend.title=element_blank())+ 
  scale_color_manual(values=c("lightcoral", "mediumseagreen"))

Produce Figure5 a)

Tcell_pca = clara(Tcell_3ind_scaled, 2)

autoplot(Tcell_pca,loadings=TRUE, loadings.label = TRUE, 
         loadings.label.colour = "lightblue4", loadings.label.size  = 5, 
         loadings.colour = "lightblue4") +
  scale_color_manual(name="", labels=c("",""), breaks = c("1", "2"), 
                     values=c("peachpuff", "peachpuff")) +
  theme(legend.text = element_text(size = 16)) + theme_minimal()

Produce Figure5 b)

fviz_cluster(list(data = Tcell_3ind_scaled, 
                  cluster=as.factor(Initial_combined$Group)), geom="point") + 
  scale_fill_manual(values=c("lightcoral", "mediumseagreen")) + 
  theme_minimal()+xlab("")+ylab("")+ggtitle("")+theme(legend.title = element_blank())

save(Tcell_3ind,file="C:/Users/mindy/Dropbox/Novel Corona Virus/Paper draft/COVID_data/WebApp.RData")