#GISAID data analysis and hierarchical clustering of SARSCOV2 events
gisaid_hcov.Jan <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-Jan.tsv")
gisaid_hcov.Jan$Lineage <- factor(gisaid_hcov.Jan$Lineage)
o <- order(gisaid_hcov.Jan$Collection.date, decreasing = F)
gisaid_hcov.Jan<- gisaid_hcov.Jan[o,]
table(gisaid_hcov.Jan$Lineage)
##
## A A.23.1 AY.23 AY.4 AY.5 B B.1
## 2 3 2 5 1 4 187
## B.1.1 B.1.1.10 B.1.1.101 B.1.1.141 B.1.1.174 B.1.1.194 B.1.1.196
## 106 2 4 1 3 1 1
## B.1.1.200 B.1.1.214 B.1.1.216 B.1.1.25 B.1.1.254 B.1.1.306 B.1.1.307
## 1 1 88 1 1 45 1
## B.1.1.311 B.1.1.317 B.1.1.326 B.1.1.353 B.1.1.354 B.1.1.355 B.1.1.364
## 1 1 16 1 17 2 1
## B.1.1.416 B.1.1.44 B.1.1.452 B.1.1.46 B.1.1.526 B.1.1.7 B.1.1.97
## 1 4 1 2 46 54 1
## B.1.170 B.1.177 B.1.177.4 B.1.177.7 B.1.184 B.1.189 B.1.2
## 1 2 1 1 2 1 3
## B.1.243 B.1.258.20 B.1.333 B.1.349 B.1.36 B.1.36.10 B.1.36.17
## 7 1 9 1 156 5 7
## B.1.36.18 B.1.36.19 B.1.36.21 B.1.36.22 B.1.36.29 B.1.36.35 B.1.36.8
## 5 1 1 4 214 1 29
## B.1.438 B.1.453 B.1.456 B.1.459 B.1.468 B.1.470 B.1.476
## 1 1 15 1 2 1 2
## B.1.524 B.1.533 B.1.537 B.1.538 B.1.540 B.1.551 B.1.560
## 2 3 7 8 4 1 26
## B.1.575 B.1.602 B.1.609 B.1.617.1 B.1.617.2 B.1.617.3 B.1.618
## 1 1 3 25 9 1 5
## B.1.94 B.10 B.6 B.6.6 B.60 C.1 L.3
## 2 1 1 1 2 1 2
## None R.1
## 38 1
gisaid_hcov.Feb <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-Feb.tsv")
gisaid_hcov.Feb$Lineage <- factor(gisaid_hcov.Feb$Lineage)
o <- order(gisaid_hcov.Feb$Collection.date, decreasing = F)
gisaid_hcov.Feb<- gisaid_hcov.Feb[o,]
gisaid_hcov.March <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-March.tsv")
gisaid_hcov.March$Lineage <- factor(gisaid_hcov.March$Lineage)
o <- order(gisaid_hcov.March$Collection.date, decreasing = F)
gisaid_hcov.March<- gisaid_hcov.March[o,]
gisaid_hcov.April.1 <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-April.1.tsv")
gisaid_hcov.April.1$Lineage <- factor(gisaid_hcov.April.1$Lineage)
gisaid_hcov.April.2 <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-April.2.tsv")
gisaid_hcov.April.2$Lineage <- factor(gisaid_hcov.April.2$Lineage)
gisaid_hcov.April<- rbind.data.frame(gisaid_hcov.April.1, gisaid_hcov.April.2)
o <- order(gisaid_hcov.April$Collection.date, decreasing = F)
gisaid_hcov.April<- gisaid_hcov.April[o,]
gisaid_hcov.May <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-May.tsv")
gisaid_hcov.May$Lineage <- factor(gisaid_hcov.May$Lineage)
o <- order(gisaid_hcov.May$Collection.date, decreasing = F)
gisaid_hcov.May<- gisaid_hcov.May[o,]
gisaid_hcov.June <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-June.tsv")
gisaid_hcov.June$Lineage <- factor(gisaid_hcov.June$Lineage)
o <- order(gisaid_hcov.June$Collection.date, decreasing = F)
gisaid_hcov.June<- gisaid_hcov.June[o,]
gisaid_hcov.July <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-July.tsv")
gisaid_hcov.July$Lineage <- factor(gisaid_hcov.July$Lineage)
o <- order(gisaid_hcov.July$Collection.date, decreasing = F)
gisaid_hcov.July<- gisaid_hcov.July[o,]
gisaid_hcov.August2 <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov_August_2.tsv")
gisaid_hcov.August2$Lineage <- factor(gisaid_hcov.August2$Lineage)
o <- order(gisaid_hcov.August2$Collection.date, decreasing = F)
gisaid_hcov.August2<- gisaid_hcov.August2[o,]
gisaid_hcov.September2 <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov_September_2.tsv")
gisaid_hcov.September2$Lineage <- factor(gisaid_hcov.September2$Lineage)
o <- order(gisaid_hcov.September2$Collection.date, decreasing = F)
gisaid_hcov.September2<- gisaid_hcov.September2[o,]
gisaid_hcov.all<- rbind.data.frame(gisaid_hcov.Jan, gisaid_hcov.Feb, gisaid_hcov.March, gisaid_hcov.April, gisaid_hcov.May,
gisaid_hcov.June, gisaid_hcov.July, gisaid_hcov.August2,gisaid_hcov.September2)
gisaid_hcov.all$Collection.month<- substr(gisaid_hcov.all$Collection.date,6,7)
gisaid_hcov.all$Collection.month<- factor(gisaid_hcov.all$Collection.month)
gisaid_hcov.all<- gisaid_hcov.all[gisaid_hcov.all$Lineage!="None",]
y<- data.frame(table(gisaid_hcov.all$Lineage))
y<- y[order(y$Freq, decreasing=T),]
y<- y[y$Var1!="None",]
y$percentage<- c(y$Freq/sum(y$Freq))*100
y[1:15,]
## Var1 Freq percentage
## 75 B.1.617.2 18764 42.1530305
## 74 B.1.617.1 4478 10.0597565
## 4 AY.4 3428 7.7009480
## 34 B.1.1.7 3213 7.2179539
## 7 B.1 2640 5.9307184
## 91 AY.12 1016 2.2824280
## 3 AY.23 949 2.1319136
## 54 B.1.36.29 660 1.4826796
## 8 B.1.1 508 1.1412140
## 47 B.1.36 488 1.0962843
## 219 AY.102 440 0.9884531
## 255 AY.43 404 0.9075796
## 146 AY.16 401 0.9008402
## 205 AY.26 361 0.8109808
## 241 AY.127 343 0.7705441
#B.1.36.csv(y, file= "SARS-CoV2 strains_Jan_Sept_2021_India.csv")
gisaid_hcov.all$B.1.617.2<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.617.2", 1, 0))
plot(factor(gisaid_hcov.all$B.1.617.2)~gisaid_hcov.all$Collection.month)

gisaid_hcov.all$B.1<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1", 1, 0))
gisaid_hcov.all$B.1.617.1<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.617.1", 1, 0))
gisaid_hcov.all$AY.4<- factor(ifelse(gisaid_hcov.all$Lineage=="AY.4", 1, 0))
gisaid_hcov.all$B.1.1.7<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.1.7", 1, 0))
gisaid_hcov.all$B.1<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1", 1, 0))
gisaid_hcov.all$AY.12<- factor(ifelse(gisaid_hcov.all$Lineage=="AY.12", 1, 0))
gisaid_hcov.all$AY.23<- factor(ifelse(gisaid_hcov.all$Lineage=="AY.23", 1, 0))
gisaid_hcov.all$AY.33<- factor(ifelse(gisaid_hcov.all$Lineage=="AY.33", 1, 0))
gisaid_hcov.all$B.1.36.29<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.36.29", 1, 0))
gisaid_hcov.all$B.1.1<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.1", 1, 0))
gisaid_hcov.all$B.1.36<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.36", 1, 0))
plot(gisaid_hcov.all$B.1.617.1~gisaid_hcov.all$Collection.month)

plot(gisaid_hcov.all$B.1.617.2~gisaid_hcov.all$Collection.month)

library(RColorBrewer)
display.brewer.all()

my.colors<- brewer.pal(10, name="Spectral")
B.1.617.2<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(B.1.617.2,col=my.colors[1:9])

B.1.617.1<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(B.1.617.1,col=my.colors[1:9])

B.1<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(B.1, col=my.colors[1:9])

B.1.36<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(B.1.36,col=my.colors[1:9])

B.1.1.7<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(B.1.1.7, col=my.colors[1:9])

#B.1.36.29
B.1.36.29<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(B.1.36.29, col=my.colors[1:9])

AY.4<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept" = sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(AY.4, col= my.colors[1:9])

AY.12<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(AY.12,col=my.colors[1:9])

colnames(gisaid_hcov.all)
## [1] "Virus.name" "Accession.ID"
## [3] "Collection.date" "Location"
## [5] "Host" "Additional.location.information"
## [7] "Sampling.strategy" "Gender"
## [9] "Patient.age" "Patient.status"
## [11] "Last.vaccinated" "Passage"
## [13] "Specimen" "Additional.host.information"
## [15] "Lineage" "Clade"
## [17] "AA.Substitutions" "Collection.month"
## [19] "B.1.617.2" "B.1"
## [21] "B.1.617.1" "AY.4"
## [23] "B.1.1.7" "AY.12"
## [25] "AY.23" "AY.33"
## [27] "B.1.36.29" "B.1.1"
## [29] "B.1.36"
AY.23<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(AY.23,col=my.colors[1:9])

AY.33<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(AY.33,col=my.colors[1:9])

B.1.1<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(B.1.1,col=my.colors[1:9])

B.1.36<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
barplot(B.1.36,col=my.colors[1:9])

y
## Var1 Freq percentage
## 75 B.1.617.2 18764 42.153030507
## 74 B.1.617.1 4478 10.059756481
## 4 AY.4 3428 7.700948016
## 34 B.1.1.7 3213 7.217953902
## 7 B.1 2640 5.930718426
## 91 AY.12 1016 2.282428000
## 3 AY.23 949 2.131913555
## 54 B.1.36.29 660 1.482679606
## 8 B.1.1 508 1.141214000
## 47 B.1.36 488 1.096284315
## 219 AY.102 440 0.988453071
## 255 AY.43 404 0.907579638
## 146 AY.16 401 0.900840185
## 205 AY.26 361 0.810980815
## 241 AY.127 343 0.770544098
## 20 B.1.1.306 293 0.658219886
## 17 B.1.1.216 278 0.624522622
## 92 AY.20 267 0.599811295
## 76 B.1.617.3 236 0.530170284
## 45 B.1.333 211 0.474008177
## 132 B.1.525 209 0.469515209
## 239 AY.125 208 0.467268724
## 236 AY.122 205 0.460529272
## 276 AY.61 197 0.442557398
## 220 AY.103 177 0.397627713
## 115 B.1.351 176 0.395381228
## 77 B.1.618 164 0.368423417
## 234 AY.120 159 0.357190996
## 5 AY.5 153 0.343712091
## 6 B 141 0.316754280
## 256 AY.44 132 0.296535921
## 149 AY.25 130 0.292042953
## 187 AY.7.1 124 0.278564047
## 43 B.1.243 122 0.274071079
## 33 B.1.1.526 118 0.265085142
## 267 AY.50 106 0.238127331
## 223 AY.106 104 0.233634362
## 56 B.1.36.8 100 0.224648425
## 225 AY.108 81 0.181965224
## 59 B.1.456 80 0.179718740
## 237 AY.123 79 0.177472256
## 90 AY.10 69 0.155007413
## 70 B.1.560 68 0.152760929
## 147 AY.19 64 0.143774992
## 184 AY.15 58 0.130296087
## 26 B.1.1.354 57 0.128049602
## 148 AY.24 55 0.123556634
## 60 B.1.459 53 0.119063665
## 107 B.1.153 51 0.114570697
## 180 AY.1 51 0.114570697
## 67 B.1.538 43 0.096598823
## 65 B.1.533 40 0.089859370
## 271 AY.55 38 0.085366402
## 53 B.1.36.22 35 0.078626949
## 71 B.1.575 35 0.078626949
## 139 B.1.628 34 0.076380465
## 240 AY.126 33 0.074133980
## 103 B.1.110 32 0.071887496
## 284 AY.76 30 0.067394528
## 1 A 29 0.065148043
## 24 B.1.1.326 29 0.065148043
## 49 B.1.36.17 29 0.065148043
## 116 B.1.351.3 28 0.062901559
## 248 AY.37 26 0.058408591
## 186 AY.21 25 0.056162106
## 206 AY.27 25 0.056162106
## 245 AY.33 24 0.053915622
## 257 AY.45 24 0.053915622
## 183 AY.14 23 0.051669138
## 249 AY.39 23 0.051669138
## 69 B.1.551 22 0.049422654
## 209 AY.32 22 0.049422654
## 272 AY.56 21 0.047176169
## 224 AY.107 20 0.044929685
## 263 AY.48 20 0.044929685
## 198 AY.22 18 0.040436717
## 48 B.1.36.10 17 0.038190232
## 286 AY.78 17 0.038190232
## 292 AY.86 17 0.038190232
## 50 B.1.36.18 16 0.035943748
## 172 B.1.633 16 0.035943748
## 231 AY.117 16 0.035943748
## 66 B.1.537 15 0.033697264
## 73 B.1.609 15 0.033697264
## 258 AY.46 15 0.033697264
## 226 AY.111 14 0.031450780
## 235 AY.121 14 0.031450780
## 283 AY.75 14 0.031450780
## 2 A.23.1 13 0.029204295
## 68 B.1.540 13 0.029204295
## 99 B.1.1.318 13 0.029204295
## 182 AY.13 13 0.029204295
## 213 B.1.623 13 0.029204295
## 290 AY.84 13 0.029204295
## 93 AY.7.2 12 0.026957811
## 42 B.1.2 11 0.024711327
## 171 B.1.627 11 0.024711327
## 199 AY.6 11 0.024711327
## 297 AY.92 11 0.024711327
## 55 B.1.36.35 10 0.022464843
## 142 Q.1 10 0.022464843
## 232 AY.118 10 0.022464843
## 254 AY.42 10 0.022464843
## 281 AY.70 10 0.022464843
## 302 AY.98.1 10 0.022464843
## 37 B.1.177 9 0.020218358
## 185 AY.17 9 0.020218358
## 274 AY.59 9 0.020218358
## 301 AY.98 9 0.020218358
## 12 B.1.1.174 8 0.017971874
## 88 A.29 8 0.017971874
## 188 AY.9 8 0.017971874
## 218 AY.100 8 0.017971874
## 262 AY.47 8 0.017971874
## 289 AY.83 8 0.017971874
## 295 AY.89 8 0.017971874
## 61 B.1.468 7 0.015725390
## 80 B.6 7 0.015725390
## 82 B.60 7 0.015725390
## 181 AY.11 7 0.015725390
## 278 AY.63 7 0.015725390
## 293 AY.87 7 0.015725390
## 113 B.1.222 6 0.013478906
## 140 C.36 6 0.013478906
## 150 B.1.1.189 6 0.013478906
## 208 AY.3 6 0.013478906
## 277 AY.62 6 0.013478906
## 279 AY.64 6 0.013478906
## 285 AY.77 6 0.013478906
## 30 B.1.1.44 5 0.011232421
## 40 B.1.184 5 0.011232421
## 120 B.1.36.33 5 0.011232421
## 154 B.1.1.528 5 0.011232421
## 210 AY.7 5 0.011232421
## 250 AY.39.1 5 0.011232421
## 266 AY.5.4 5 0.011232421
## 269 AY.53 5 0.011232421
## 294 AY.88 5 0.011232421
## 10 B.1.1.101 4 0.008985937
## 51 B.1.36.19 4 0.008985937
## 83 C.1 4 0.008985937
## 119 B.1.36.31 4 0.008985937
## 165 B.1.465 4 0.008985937
## 170 B.1.596 4 0.008985937
## 214 AY.29 4 0.008985937
## 233 AY.119 4 0.008985937
## 246 AY.35 4 0.008985937
## 251 AY.4.2.3 4 0.008985937
## 252 AY.4.4 4 0.008985937
## 259 AY.46.2 4 0.008985937
## 260 AY.46.4 4 0.008985937
## 282 AY.73 4 0.008985937
## 287 AY.79 4 0.008985937
## 291 AY.85 4 0.008985937
## 18 B.1.1.25 3 0.006739453
## 101 B.1.1.419 3 0.006739453
## 108 B.1.160 3 0.006739453
## 110 B.1.210 3 0.006739453
## 112 B.1.214.2 3 0.006739453
## 121 B.1.36.9 3 0.006739453
## 124 B.1.398 3 0.006739453
## 137 B.1.582 3 0.006739453
## 144 A.27 3 0.006739453
## 174 C.36.3 3 0.006739453
## 176 P.1 3 0.006739453
## 177 Q.6 3 0.006739453
## 178 Q.8 3 0.006739453
## 207 AY.28 3 0.006739453
## 227 AY.112 3 0.006739453
## 229 AY.114 3 0.006739453
## 243 AY.18 3 0.006739453
## 253 AY.4.6 3 0.006739453
## 265 AY.5.3 3 0.006739453
## 268 AY.51 3 0.006739453
## 270 AY.54 3 0.006739453
## 273 AY.58 3 0.006739453
## 298 AY.93 3 0.006739453
## 309 AY.9.2 3 0.006739453
## 310 AY.90 3 0.006739453
## 311 AY.99 3 0.006739453
## 9 B.1.1.10 2 0.004492969
## 13 B.1.1.194 2 0.004492969
## 15 B.1.1.200 2 0.004492969
## 21 B.1.1.307 2 0.004492969
## 25 B.1.1.353 2 0.004492969
## 27 B.1.1.355 2 0.004492969
## 32 B.1.1.46 2 0.004492969
## 39 B.1.177.7 2 0.004492969
## 44 B.1.258.20 2 0.004492969
## 62 B.1.470 2 0.004492969
## 63 B.1.476 2 0.004492969
## 64 B.1.524 2 0.004492969
## 72 B.1.602 2 0.004492969
## 78 B.1.94 2 0.004492969
## 81 B.6.6 2 0.004492969
## 84 L.3 2 0.004492969
## 86 R.1 2 0.004492969
## 97 B.1.1.274 2 0.004492969
## 111 B.1.214 2 0.004492969
## 117 B.1.36.16 2 0.004492969
## 118 B.1.36.24 2 0.004492969
## 128 B.1.466.1 2 0.004492969
## 129 B.1.480 2 0.004492969
## 133 B.1.564 2 0.004492969
## 134 B.1.566 2 0.004492969
## 145 AE.2 2 0.004492969
## 152 B.1.1.37 2 0.004492969
## 157 B.1.195 2 0.004492969
## 159 B.1.351.2 2 0.004492969
## 166 B.1.466.2 2 0.004492969
## 191 B.1.305 2 0.004492969
## 192 B.1.362 2 0.004492969
## 195 C.38 2 0.004492969
## 200 B.1.1.1 2 0.004492969
## 221 AY.104 2 0.004492969
## 222 AY.105 2 0.004492969
## 228 AY.113 2 0.004492969
## 238 AY.124 2 0.004492969
## 242 AY.16.1 2 0.004492969
## 288 AY.80 2 0.004492969
## 303 AY.101 2 0.004492969
## 307 AY.46.1 2 0.004492969
## 312 AY.99.2 2 0.004492969
## 11 B.1.1.141 1 0.002246484
## 14 B.1.1.196 1 0.002246484
## 16 B.1.1.214 1 0.002246484
## 19 B.1.1.254 1 0.002246484
## 22 B.1.1.311 1 0.002246484
## 23 B.1.1.317 1 0.002246484
## 28 B.1.1.364 1 0.002246484
## 29 B.1.1.416 1 0.002246484
## 31 B.1.1.452 1 0.002246484
## 35 B.1.1.97 1 0.002246484
## 36 B.1.170 1 0.002246484
## 38 B.1.177.4 1 0.002246484
## 41 B.1.189 1 0.002246484
## 46 B.1.349 1 0.002246484
## 52 B.1.36.21 1 0.002246484
## 57 B.1.438 1 0.002246484
## 58 B.1.453 1 0.002246484
## 79 B.10 1 0.002246484
## 87 A.21 1 0.002246484
## 89 AE.7 1 0.002246484
## 94 B.1.1.135 1 0.002246484
## 95 B.1.1.17 1 0.002246484
## 96 B.1.1.201 1 0.002246484
## 98 B.1.1.28 1 0.002246484
## 100 B.1.1.351 1 0.002246484
## 102 B.1.1.57 1 0.002246484
## 104 B.1.111 1 0.002246484
## 105 B.1.12 1 0.002246484
## 106 B.1.145 1 0.002246484
## 109 B.1.164 1 0.002246484
## 114 B.1.289 1 0.002246484
## 122 B.1.371 1 0.002246484
## 123 B.1.383 1 0.002246484
## 125 B.1.442 1 0.002246484
## 126 B.1.460 1 0.002246484
## 127 B.1.466 1 0.002246484
## 130 B.1.520 1 0.002246484
## 131 B.1.523 1 0.002246484
## 135 B.1.569 1 0.002246484
## 136 B.1.576 1 0.002246484
## 138 B.1.617 1 0.002246484
## 141 P.2 1 0.002246484
## 143 Q.4 1 0.002246484
## 151 B.1.1.365 1 0.002246484
## 153 B.1.1.525 1 0.002246484
## 155 B.1.143 1 0.002246484
## 156 B.1.159 1 0.002246484
## 158 B.1.229 1 0.002246484
## 160 B.1.382 1 0.002246484
## 161 B.1.397 1 0.002246484
## 162 B.1.400.1 1 0.002246484
## 163 B.1.413 1 0.002246484
## 164 B.1.429 1 0.002246484
## 167 B.1.509 1 0.002246484
## 168 B.1.526 1 0.002246484
## 169 B.1.575.2 1 0.002246484
## 173 B.1.636 1 0.002246484
## 175 N.10 1 0.002246484
## 179 AE.4 1 0.002246484
## 189 B.1.1.121 1 0.002246484
## 190 B.1.214.3 1 0.002246484
## 193 B.1.441 1 0.002246484
## 194 C.37 1 0.002246484
## 196 A.5 1 0.002246484
## 197 AE.3 1 0.002246484
## 201 B.1.1.372 1 0.002246484
## 202 B.1.1.8 1 0.002246484
## 203 B.1.302 1 0.002246484
## 204 B.1.577 1 0.002246484
## 211 B.1.281 1 0.002246484
## 212 B.1.36.7 1 0.002246484
## 215 AY.30 1 0.002246484
## 216 B.1.395 1 0.002246484
## 217 B.49 1 0.002246484
## 230 AY.115 1 0.002246484
## 244 AY.31 1 0.002246484
## 247 AY.36 1 0.002246484
## 261 AY.46.6 1 0.002246484
## 264 AY.49 1 0.002246484
## 275 AY.60 1 0.002246484
## 280 AY.65 1 0.002246484
## 296 AY.91 1 0.002246484
## 299 AY.95 1 0.002246484
## 300 AY.96 1 0.002246484
## 304 AY.119.1 1 0.002246484
## 305 AY.120.1 1 0.002246484
## 306 AY.120.2.1 1 0.002246484
## 308 AY.71 1 0.002246484
colnames(gisaid_hcov.all)
## [1] "Virus.name" "Accession.ID"
## [3] "Collection.date" "Location"
## [5] "Host" "Additional.location.information"
## [7] "Sampling.strategy" "Gender"
## [9] "Patient.age" "Patient.status"
## [11] "Last.vaccinated" "Passage"
## [13] "Specimen" "Additional.host.information"
## [15] "Lineage" "Clade"
## [17] "AA.Substitutions" "Collection.month"
## [19] "B.1.617.2" "B.1"
## [21] "B.1.617.1" "AY.4"
## [23] "B.1.1.7" "AY.12"
## [25] "AY.23" "AY.33"
## [27] "B.1.36.29" "B.1.1"
## [29] "B.1.36"
par(mfrow=c(1,1))
Lineage<- data.frame(B.1.617.2,B.1.617.1, AY.4, B.1.1.7,B.1, AY.12,AY.23,B.1.36.29,B.1.1,B.1.36)
colnames(Lineage)
## [1] "B.1.617.2" "B.1.617.1" "AY.4" "B.1.1.7" "B.1" "AY.12"
## [7] "AY.23" "B.1.36.29" "B.1.1" "B.1.36"
Lineage_r<- round(Lineage, digit=2)
#write.csv(Lineage_r, "Lineage distribution_JantoSept_India_2021.csv")
plot(Lineage$B.1.617.2, type="b", xlab= "Month 2021", ylab= "Percentage(%)", col= "black", xlim= c(1,9), lwd=2, xaxt= "n")
axis(1, at = seq(1, 9, by = 1), las=2, labels= rownames(Lineage))
lines(Lineage$B.1.617.1, type="b",lwd=2, col=my.colors[1])
lines(Lineage$AY.4, type="b",lwd=2, col=my.colors[2])
lines(Lineage$B.1.1.7, type="b",lwd=2, col=my.colors[3])
lines(Lineage$B.1, type="b", lwd=2, col=my.colors[4])
lines(Lineage$AY.12, type="b",lwd=2, col=my.colors[5])
lines(Lineage$AY.23, type="b",lwd=2, col=my.colors[6])
lines(Lineage$B.1.36.29, type="b",lwd=2, col=my.colors[7])
lines(Lineage$B.1.1, type="b",lwd=2, col=my.colors[8])
lines(Lineage$B.1.36, type="b",lwd=2, col=my.colors[9])
legend("topleft", legend= colnames(Lineage)[1:10], col= c("black", my.colors[c(1:9)]),lty=1,title= "Lineage", cex=0.75)

par(mfrow= c(2,3))
barplot(Lineage$B.1.617.1, col= my.colors[1:9], names.arg = rownames(Lineage), main= "B.1.617.1", las=2, ylim= c(0,80))
barplot(Lineage$B.1, col= my.colors[1:9], names.arg = rownames(Lineage), main= "B.1", las=2,ylim= c(0,80))
barplot(Lineage$B.1.1.7, col= my.colors[1:9], names.arg = rownames(Lineage), main= "B.1.1.7", las=2,ylim= c(0,80))
barplot(Lineage$B.1.617.2, col= my.colors[1:9], names.arg = rownames(Lineage), main= "B.1.617.2",las=2,ylim= c(0,80) )
barplot(Lineage$AY.4, col= my.colors[1:9], names.arg = rownames(Lineage), main= "AY.4", las=2,ylim= c(0,80))
barplot(Lineage$AY.12, col= my.colors[1:9], names.arg = rownames(Lineage), main= "AY.12 ", las=2,ylim= c(0,80))
cor(Lineage)
## B.1.617.2 B.1.617.1 AY.4 B.1.1.7 B.1 AY.12
## B.1.617.2 1.0000000 -0.55150574 0.7627107 -0.6441639 -0.7945639 0.7597749
## B.1.617.1 -0.5515057 1.00000000 -0.3813487 0.9759761 0.3860769 -0.2298838
## AY.4 0.7627107 -0.38134870 1.0000000 -0.4244577 -0.3033932 0.9062736
## B.1.1.7 -0.6441639 0.97597606 -0.4244577 1.0000000 0.4314783 -0.3242978
## B.1 -0.7945639 0.38607695 -0.3033932 0.4314783 1.0000000 -0.2892071
## AY.12 0.7597749 -0.22988376 0.9062736 -0.3242978 -0.2892071 1.0000000
## AY.23 0.7126003 -0.15037961 0.8705798 -0.2531271 -0.2414937 0.9590670
## B.1.36.29 -0.7923956 0.05810885 -0.4418485 0.1717619 0.8316041 -0.4952632
## B.1.1 -0.8049458 0.13364746 -0.4308610 0.2384594 0.8519277 -0.4697462
## B.1.36 -0.8011952 0.06631746 -0.4381744 0.1674751 0.8722061 -0.4939294
## AY.23 B.1.36.29 B.1.1 B.1.36
## B.1.617.2 0.7126003 -0.79239563 -0.8049458 -0.80119523
## B.1.617.1 -0.1503796 0.05810885 0.1336475 0.06631746
## AY.4 0.8705798 -0.44184846 -0.4308610 -0.43817436
## B.1.1.7 -0.2531271 0.17176195 0.2384594 0.16747515
## B.1 -0.2414937 0.83160413 0.8519277 0.87220607
## AY.12 0.9590670 -0.49526324 -0.4697462 -0.49392941
## AY.23 1.0000000 -0.47259993 -0.4360809 -0.47210243
## B.1.36.29 -0.4725999 1.00000000 0.9941551 0.99270704
## B.1.1 -0.4360809 0.99415508 1.0000000 0.98535909
## B.1.36 -0.4721024 0.99270704 0.9853591 1.00000000
library(ggplot2)

ggcorrplot::ggcorrplot(cor(Lineage))

par(mfrow= c(1,1))
ggcorrplot::ggcorrplot(cor(Lineage), hc.order = TRUE, outline.col = "white")

total.samples<- nrow(gisaid_hcov.all) # samples analyzed
total.samples_mw<- c("Jan"= sum(gisaid_hcov.all$Collection.month=="01"),
"Feb"= sum(gisaid_hcov.all$Collection.month=="02"),
"March"= sum(gisaid_hcov.all$Collection.month=="03"),
"April"= sum(gisaid_hcov.all$Collection.month=="04"),
"May"= sum(gisaid_hcov.all$Collection.month=="05"),
"June"= sum(gisaid_hcov.all$Collection.month=="06"),
"July"= sum(gisaid_hcov.all$Collection.month=="07"),
"August"= sum(gisaid_hcov.all$Collection.month=="08"),
"Sept"= sum(gisaid_hcov.all$Collection.month=="09")
)
covid.cases<- total.samples_mw
covid.cases_B.1.617.2<- cbind.data.frame(covid.cases, B.1.617.2*total.samples_mw/100 )
covid.cases_AY.4<- cbind.data.frame(covid.cases, AY.4*total.samples_mw/100 )
colnames(covid.cases_B.1.617.2)<- c("Reported_Cases", "Reported_B.1.617.2")
colnames(covid.cases_AY.4)<- c("Reported_Cases", "Reported_AY.4")
coeff<-max(covid.cases_B.1.617.2$Reported_Cases)/max(covid.cases_B.1.617.2$Reported_B.1.617.2)
plot(covid.cases_B.1.617.2$Reported_Cases, type="b", xlab= "Month of 2021", ylab= "cases", col= "black", xlim= c(1,9), xaxt= "n")
axis(1, at = seq(1, 9, by = 1), las=2, labels= rownames(covid.cases_B.1.617.2))

library(ggplot2)
ggplot(data= covid.cases_B.1.617.2, aes(x=c(1:9)))+
geom_smooth(aes(y=Reported_Cases), col= "blue", method= "loess")+
geom_smooth( aes(y=Reported_B.1.617.2*coeff), col= "red", method= "loess")+
scale_y_continuous(
# Features of the first axis
name = "COVID-19 cases/month",
# Add a second axis and specify its features
sec.axis = sec_axis(~./coeff, name="Reported B1.617.2/month")
)+
theme(
axis.title.y.left=element_text(color="blue"),
axis.text.y.left=element_text(color="blue"),
axis.title.y.right=element_text(color="red"),
axis.text.y.right=element_text(color="red"),
axis.text.x.top = element_text()
)+
scale_x_continuous(name= "Month of 2021", breaks=1:9, labels= substr(rownames(covid.cases_B.1.617.2), 1,3))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

#AY.4
covid.cases_AY.4<- cbind.data.frame(covid.cases,AY.4*total.samples_mw/100 )
colnames(covid.cases_AY.4)<-c("Reported_Cases", "Reported_AY.4")
coeff<-max(covid.cases_AY.4$Reported_Cases)/max(covid.cases_AY.4$Reported_AY.4)
ggplot(data= covid.cases_AY.4, aes(x=c(1:9)))+
geom_smooth(aes(y=Reported_Cases), col= "blue", method= "loess")+
geom_smooth( aes(y=Reported_AY.4*coeff), col= "red", method= "loess")+
scale_y_continuous(
# Features of the first axis
name = "COVID-19 cases/month",
# Add a second axis and specify its features
sec.axis = sec_axis(~./coeff, name="Reported AY.4/month")
)+
theme(
axis.title.y.left=element_text(color="blue"),
axis.text.y.left=element_text(color="blue"),
axis.title.y.right=element_text(color="red"),
axis.text.y.right=element_text(color="red"),
axis.text.x.top = element_text()
)+
scale_x_continuous(name= "Month of 2021", breaks=1:9, labels= substr(rownames(covid.cases_AY.4), 1,3))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

#B.1.1
covid.cases_B.1.1<- cbind.data.frame(covid.cases,B.1.1*total.samples_mw/100 )
colnames(covid.cases_B.1.1)<-c("Reported_Cases", "Reported_B.1.1")
coeff<-max(covid.cases_B.1.1$Reported_Cases)/max(covid.cases_B.1.1$Reported_B.1.1)
ggplot(data= covid.cases_B.1.1, aes(x=c(1:9)))+
geom_smooth(aes(y=Reported_Cases), col= "blue", method= "loess")+
geom_smooth( aes(y=Reported_B.1.1*coeff), col= "red", method= "loess")+
scale_y_continuous(
# Features of the first axis
name = "COVID-19 cases/month",
# Add a second axis and specify its features
sec.axis = sec_axis(~./coeff, name="Reported B.1.1/month")
)+
theme(
axis.title.y.left=element_text(color="blue"),
axis.text.y.left=element_text(color="blue"),
axis.title.y.right=element_text(color="red"),
axis.text.y.right=element_text(color="red"),
axis.text.x.top = element_text()
)+
scale_x_continuous(name= "Month of 2021", breaks=1:9, labels= substr(rownames(covid.cases_B.1.1), 1,3))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

#B.1.617.1
covid.cases_B.1.617.1<- cbind.data.frame(covid.cases,B.1.617.1*total.samples_mw/100 )
colnames(covid.cases_B.1.617.1)<-c("Reported_Cases", "Reported_B.1.617.1")
coeff<-max(covid.cases_B.1.617.1$Reported_Cases)/max(covid.cases_B.1.617.1$Reported_B.1.617.1)
ggplot(data= covid.cases_B.1.617.1, aes(x=c(1:9)))+
geom_smooth(aes(y=Reported_Cases), col= "blue", method= "loess")+
geom_smooth( aes(y=Reported_B.1.617.1*coeff), col= "red", method= "loess")+
scale_y_continuous(
# Features of the first axis
name = "COVID-19 cases/month",
# Add a second axis and specify its features
sec.axis = sec_axis(~./coeff, name="Reported B.1.617.1/month")
)+
theme(
axis.title.y.left=element_text(color="blue"),
axis.text.y.left=element_text(color="blue"),
axis.title.y.right=element_text(color="red"),
axis.text.y.right=element_text(color="red"),
axis.text.x.top = element_text()
)+
scale_x_continuous(name= "Month of 2021", breaks=1:9, labels= substr(rownames(covid.cases_B.1.617.1), 1,3))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

head(y)
## Var1 Freq percentage
## 75 B.1.617.2 18764 42.153031
## 74 B.1.617.1 4478 10.059756
## 4 AY.4 3428 7.700948
## 34 B.1.1.7 3213 7.217954
## 7 B.1 2640 5.930718
## 91 AY.12 1016 2.282428
Ay.x<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$Ay.x==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="02")*100,
"March"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
"April"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
"May"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="05")*100,
"June"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="06")*100,
"July"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="07")*100,
"August"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)
df<- gisaid_hcov.all
df$Lineage<- gsub("AY.*", "AY.x",df$Lineage)
y2<- data.frame(table(df$Lineage))
y2<- y2[order(y2$Freq, decreasing=T),]
y2<- y2[y2$Var1!="None",]
y2$percentage<- c(y2$Freq/sum(y2$Freq))*100
y2[1:15,]
## Var1 Freq percentage
## 160 B.1.617.2 18764 42.1530305
## 11 AY.x 10589 23.7880217
## 159 B.1.617.1 4478 10.0597565
## 57 B.1.1.7 3213 7.2179539
## 13 B.1 2640 5.9307184
## 103 B.1.36.29 660 1.4826796
## 14 B.1.1 508 1.1412140
## 94 B.1.36 488 1.0962843
## 34 B.1.1.306 293 0.6582199
## 29 B.1.1.216 278 0.6245226
## 161 B.1.617.3 236 0.5301703
## 89 B.1.333 211 0.4740082
## 139 B.1.525 209 0.4695152
## 91 B.1.351 176 0.3953812
## 162 B.1.618 164 0.3684234
df$AY.x<- factor(ifelse(df$Lineage=="AY.x", 1, 0))
AY.x<- c("Jan"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="01"), 1,0))/ sum(df$Collection.month=="01")*100,
"Feb"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="02"), 1,0))/ sum(df$Collection.month=="02")*100,
"March"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="03"), 1,0))/ sum(df$Collection.month=="03")*100,
"April"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="04"), 1,0))/ sum(df$Collection.month=="04")*100,
"May"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="05"), 1,0))/ sum(df$Collection.month=="05")*100,
"June"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="06"), 1,0))/ sum(df$Collection.month=="06")*100,
"July"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="07"), 1,0))/ sum(df$Collection.month=="07")*100,
"August"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="08"), 1,0))/sum(df$Collection.month=="08")*100,
"Sept"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="09"), 1,0))/sum(df$Collection.month=="09")*100
)
barplot(AY.x, col= my.colors[1:9], main= "AY.x")

barplot(B.1.617.2, col= my.colors[1:9], main= "B.1.617.2")

Lineage$AY.x<- AY.x
plot(Lineage$B.1.617.2, type="b", xlab= "Month 2021", ylab= "Percentage(%)", col= "black", xlim= c(1,9), lwd=2, xaxt= "n")
axis(1, at = seq(1, 9, by = 1), las=2, labels= rownames(Lineage))
lines(Lineage$AY.x, type="b",lwd=2,lty=2, col="blue")
legend("topleft", legend= c("B.1.617.2", "AY.x"), col= c("black", "blue"), lty= c(1,2), lwd=2)

#AY.x
covid.cases_AY.x<- cbind.data.frame(covid.cases,AY.x*total.samples_mw/100 )
colnames(covid.cases_AY.x)<-c("Reported_Cases", "Reported_AY.x")
coeff<-max(covid.cases_AY.x$Reported_Cases)/max(covid.cases_AY.x$Reported_AY.x)
ggplot(data= covid.cases_AY.x, aes(x=c(1:9)))+
geom_smooth(aes(y=Reported_Cases), col= "blue", method= "loess")+
geom_smooth( aes(y=Reported_AY.x*coeff), col= "red", method= "loess")+
scale_y_continuous(
# Features of the first axis
name = "COVID-19 cases/month",
# Add a second axis and specify its features
sec.axis = sec_axis(~./coeff, name="Reported AY.x/month")
)+
theme(
axis.title.y.left=element_text(color="blue"),
axis.text.y.left=element_text(color="blue"),
axis.title.y.right=element_text(color="red"),
axis.text.y.right=element_text(color="red"),
axis.text.x.top = element_text()
)+
scale_x_continuous(name= "Month of 2021", breaks=1:9, labels= substr(rownames(covid.cases_AY.x), 1,3))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
