#GISAID data analysis and hierarchical clustering of SARSCOV2 events


gisaid_hcov.Jan <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-Jan.tsv")
gisaid_hcov.Jan$Lineage <- factor(gisaid_hcov.Jan$Lineage)
o <- order(gisaid_hcov.Jan$Collection.date, decreasing = F)
gisaid_hcov.Jan<- gisaid_hcov.Jan[o,]

table(gisaid_hcov.Jan$Lineage)
## 
##          A     A.23.1      AY.23       AY.4       AY.5          B        B.1 
##          2          3          2          5          1          4        187 
##      B.1.1   B.1.1.10  B.1.1.101  B.1.1.141  B.1.1.174  B.1.1.194  B.1.1.196 
##        106          2          4          1          3          1          1 
##  B.1.1.200  B.1.1.214  B.1.1.216   B.1.1.25  B.1.1.254  B.1.1.306  B.1.1.307 
##          1          1         88          1          1         45          1 
##  B.1.1.311  B.1.1.317  B.1.1.326  B.1.1.353  B.1.1.354  B.1.1.355  B.1.1.364 
##          1          1         16          1         17          2          1 
##  B.1.1.416   B.1.1.44  B.1.1.452   B.1.1.46  B.1.1.526    B.1.1.7   B.1.1.97 
##          1          4          1          2         46         54          1 
##    B.1.170    B.1.177  B.1.177.4  B.1.177.7    B.1.184    B.1.189      B.1.2 
##          1          2          1          1          2          1          3 
##    B.1.243 B.1.258.20    B.1.333    B.1.349     B.1.36  B.1.36.10  B.1.36.17 
##          7          1          9          1        156          5          7 
##  B.1.36.18  B.1.36.19  B.1.36.21  B.1.36.22  B.1.36.29  B.1.36.35   B.1.36.8 
##          5          1          1          4        214          1         29 
##    B.1.438    B.1.453    B.1.456    B.1.459    B.1.468    B.1.470    B.1.476 
##          1          1         15          1          2          1          2 
##    B.1.524    B.1.533    B.1.537    B.1.538    B.1.540    B.1.551    B.1.560 
##          2          3          7          8          4          1         26 
##    B.1.575    B.1.602    B.1.609  B.1.617.1  B.1.617.2  B.1.617.3    B.1.618 
##          1          1          3         25          9          1          5 
##     B.1.94       B.10        B.6      B.6.6       B.60        C.1        L.3 
##          2          1          1          1          2          1          2 
##       None        R.1 
##         38          1
gisaid_hcov.Feb <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-Feb.tsv")
gisaid_hcov.Feb$Lineage <- factor(gisaid_hcov.Feb$Lineage)
o <- order(gisaid_hcov.Feb$Collection.date, decreasing = F)
gisaid_hcov.Feb<- gisaid_hcov.Feb[o,]


gisaid_hcov.March <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-March.tsv")
gisaid_hcov.March$Lineage <- factor(gisaid_hcov.March$Lineage)
o <- order(gisaid_hcov.March$Collection.date, decreasing = F)
gisaid_hcov.March<- gisaid_hcov.March[o,]

gisaid_hcov.April.1 <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-April.1.tsv")
gisaid_hcov.April.1$Lineage <- factor(gisaid_hcov.April.1$Lineage)
gisaid_hcov.April.2 <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-April.2.tsv")
gisaid_hcov.April.2$Lineage <- factor(gisaid_hcov.April.2$Lineage)
gisaid_hcov.April<- rbind.data.frame(gisaid_hcov.April.1, gisaid_hcov.April.2)
o <- order(gisaid_hcov.April$Collection.date, decreasing = F)
gisaid_hcov.April<- gisaid_hcov.April[o,]


gisaid_hcov.May <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-May.tsv")
gisaid_hcov.May$Lineage <- factor(gisaid_hcov.May$Lineage)
o <- order(gisaid_hcov.May$Collection.date, decreasing = F)
gisaid_hcov.May<- gisaid_hcov.May[o,]

gisaid_hcov.June <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-June.tsv")
gisaid_hcov.June$Lineage <- factor(gisaid_hcov.June$Lineage)
o <- order(gisaid_hcov.June$Collection.date, decreasing = F)
gisaid_hcov.June<- gisaid_hcov.June[o,]


gisaid_hcov.July <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov-July.tsv")
gisaid_hcov.July$Lineage <- factor(gisaid_hcov.July$Lineage)
o <- order(gisaid_hcov.July$Collection.date, decreasing = F)
gisaid_hcov.July<- gisaid_hcov.July[o,]


gisaid_hcov.August2 <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov_August_2.tsv")
gisaid_hcov.August2$Lineage <- factor(gisaid_hcov.August2$Lineage)
o <- order(gisaid_hcov.August2$Collection.date, decreasing = F)
gisaid_hcov.August2<- gisaid_hcov.August2[o,]


gisaid_hcov.September2 <- read.delim("C:/Users/SIDDIK SARKAR/Downloads/gisaid_hcov_September_2.tsv")
gisaid_hcov.September2$Lineage <- factor(gisaid_hcov.September2$Lineage)
o <- order(gisaid_hcov.September2$Collection.date, decreasing = F)
gisaid_hcov.September2<- gisaid_hcov.September2[o,]







gisaid_hcov.all<- rbind.data.frame(gisaid_hcov.Jan, gisaid_hcov.Feb, gisaid_hcov.March, gisaid_hcov.April, gisaid_hcov.May, 
                                   gisaid_hcov.June, gisaid_hcov.July, gisaid_hcov.August2,gisaid_hcov.September2)

gisaid_hcov.all$Collection.month<- substr(gisaid_hcov.all$Collection.date,6,7)
gisaid_hcov.all$Collection.month<- factor(gisaid_hcov.all$Collection.month)
gisaid_hcov.all<- gisaid_hcov.all[gisaid_hcov.all$Lineage!="None",]

y<- data.frame(table(gisaid_hcov.all$Lineage)) 
y<- y[order(y$Freq, decreasing=T),]
y<- y[y$Var1!="None",]
y$percentage<- c(y$Freq/sum(y$Freq))*100
y[1:15,]
##          Var1  Freq percentage
## 75  B.1.617.2 18764 42.1530305
## 74  B.1.617.1  4478 10.0597565
## 4        AY.4  3428  7.7009480
## 34    B.1.1.7  3213  7.2179539
## 7         B.1  2640  5.9307184
## 91      AY.12  1016  2.2824280
## 3       AY.23   949  2.1319136
## 54  B.1.36.29   660  1.4826796
## 8       B.1.1   508  1.1412140
## 47     B.1.36   488  1.0962843
## 219    AY.102   440  0.9884531
## 255     AY.43   404  0.9075796
## 146     AY.16   401  0.9008402
## 205     AY.26   361  0.8109808
## 241    AY.127   343  0.7705441
#B.1.36.csv(y, file= "SARS-CoV2 strains_Jan_Sept_2021_India.csv")

gisaid_hcov.all$B.1.617.2<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.617.2", 1, 0))
plot(factor(gisaid_hcov.all$B.1.617.2)~gisaid_hcov.all$Collection.month)

gisaid_hcov.all$B.1<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1", 1, 0))

gisaid_hcov.all$B.1.617.1<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.617.1", 1, 0))
gisaid_hcov.all$AY.4<- factor(ifelse(gisaid_hcov.all$Lineage=="AY.4", 1, 0))
gisaid_hcov.all$B.1.1.7<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.1.7", 1, 0))
gisaid_hcov.all$B.1<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1", 1, 0))
gisaid_hcov.all$AY.12<- factor(ifelse(gisaid_hcov.all$Lineage=="AY.12", 1, 0))
gisaid_hcov.all$AY.23<- factor(ifelse(gisaid_hcov.all$Lineage=="AY.23", 1, 0))
gisaid_hcov.all$AY.33<- factor(ifelse(gisaid_hcov.all$Lineage=="AY.33", 1, 0))
gisaid_hcov.all$B.1.36.29<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.36.29", 1, 0))
gisaid_hcov.all$B.1.1<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.1", 1, 0))
gisaid_hcov.all$B.1.36<- factor(ifelse(gisaid_hcov.all$Lineage=="B.1.36", 1, 0))

plot(gisaid_hcov.all$B.1.617.1~gisaid_hcov.all$Collection.month)

plot(gisaid_hcov.all$B.1.617.2~gisaid_hcov.all$Collection.month)

library(RColorBrewer)
display.brewer.all()

my.colors<- brewer.pal(10, name="Spectral")


B.1.617.2<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
              "Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
              "March"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
              "April"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
              "May"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
              "June"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
              "July"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
              "August"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
              "Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.617.2==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
              )

barplot(B.1.617.2,col=my.colors[1:9])

B.1.617.1<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
              "Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
              "March"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
              "April"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
              "May"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
              "June"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
              "July"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
              "August"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
              "Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.617.1==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)

barplot(B.1.617.1,col=my.colors[1:9])

B.1<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
              "Feb"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
              "March"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
              "April"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
              "May"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
              "June"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
              "July"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
              "August"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
              "Sept"= sum(ifelse(c(gisaid_hcov.all$B.1==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)

barplot(B.1, col=my.colors[1:9])

B.1.36<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
        "Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
        "March"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
        "April"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
        "May"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
        "June"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
        "July"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
        "August"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
        "Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
        
)

barplot(B.1.36,col=my.colors[1:9])

B.1.1.7<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
              "Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
              "March"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
              "April"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
              "May"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
              "June"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
              "July"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
              "August"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
            "Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.1.7==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
            
)

barplot(B.1.1.7, col=my.colors[1:9])

#B.1.36.29

B.1.36.29<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
            "Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
            "March"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
            "April"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
            "May"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
            "June"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
            "July"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
            "August"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
            "Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)

barplot(B.1.36.29, col=my.colors[1:9])

AY.4<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
              "Feb"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
              "March"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
              "April"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
              "May"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
              "June"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
              "July"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
              "August"= sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
         "Sept"  = sum(ifelse(c(gisaid_hcov.all$AY.4==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
         
)

barplot(AY.4, col= my.colors[1:9])

AY.12<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
         "Feb"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
         "March"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
         "April"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
         "May"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
         "June"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
         "July"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
         "August"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
      "Sept"= sum(ifelse(c(gisaid_hcov.all$AY.12==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
         
)

barplot(AY.12,col=my.colors[1:9])

colnames(gisaid_hcov.all)
##  [1] "Virus.name"                      "Accession.ID"                   
##  [3] "Collection.date"                 "Location"                       
##  [5] "Host"                            "Additional.location.information"
##  [7] "Sampling.strategy"               "Gender"                         
##  [9] "Patient.age"                     "Patient.status"                 
## [11] "Last.vaccinated"                 "Passage"                        
## [13] "Specimen"                        "Additional.host.information"    
## [15] "Lineage"                         "Clade"                          
## [17] "AA.Substitutions"                "Collection.month"               
## [19] "B.1.617.2"                       "B.1"                            
## [21] "B.1.617.1"                       "AY.4"                           
## [23] "B.1.1.7"                         "AY.12"                          
## [25] "AY.23"                           "AY.33"                          
## [27] "B.1.36.29"                       "B.1.1"                          
## [29] "B.1.36"
AY.23<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
          "Feb"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
          "March"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
          "April"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
          "May"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
          "June"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
          "July"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
          "August"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
      "Sept"= sum(ifelse(c(gisaid_hcov.all$AY.23==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
          
)

barplot(AY.23,col=my.colors[1:9])

AY.33<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
          "Feb"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
          "March"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
          "April"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
          "May"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
          "June"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
          "July"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
          "August"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
          "Sept"= sum(ifelse(c(gisaid_hcov.all$AY.33==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
          
)

barplot(AY.33,col=my.colors[1:9])

B.1.1<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
          "Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
          "March"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
          "April"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
          "May"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
          "June"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
          "July"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
          "August"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
          "Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.1==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
          
)
barplot(B.1.1,col=my.colors[1:9])

B.1.36<- c("Jan"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
          "Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
          "March"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
          "April"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
          "May"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
          "June"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
          "July"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
          "August"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
          "Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.36==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
          
)
barplot(B.1.36,col=my.colors[1:9])

y
##           Var1  Freq   percentage
## 75   B.1.617.2 18764 42.153030507
## 74   B.1.617.1  4478 10.059756481
## 4         AY.4  3428  7.700948016
## 34     B.1.1.7  3213  7.217953902
## 7          B.1  2640  5.930718426
## 91       AY.12  1016  2.282428000
## 3        AY.23   949  2.131913555
## 54   B.1.36.29   660  1.482679606
## 8        B.1.1   508  1.141214000
## 47      B.1.36   488  1.096284315
## 219     AY.102   440  0.988453071
## 255      AY.43   404  0.907579638
## 146      AY.16   401  0.900840185
## 205      AY.26   361  0.810980815
## 241     AY.127   343  0.770544098
## 20   B.1.1.306   293  0.658219886
## 17   B.1.1.216   278  0.624522622
## 92       AY.20   267  0.599811295
## 76   B.1.617.3   236  0.530170284
## 45     B.1.333   211  0.474008177
## 132    B.1.525   209  0.469515209
## 239     AY.125   208  0.467268724
## 236     AY.122   205  0.460529272
## 276      AY.61   197  0.442557398
## 220     AY.103   177  0.397627713
## 115    B.1.351   176  0.395381228
## 77     B.1.618   164  0.368423417
## 234     AY.120   159  0.357190996
## 5         AY.5   153  0.343712091
## 6            B   141  0.316754280
## 256      AY.44   132  0.296535921
## 149      AY.25   130  0.292042953
## 187     AY.7.1   124  0.278564047
## 43     B.1.243   122  0.274071079
## 33   B.1.1.526   118  0.265085142
## 267      AY.50   106  0.238127331
## 223     AY.106   104  0.233634362
## 56    B.1.36.8   100  0.224648425
## 225     AY.108    81  0.181965224
## 59     B.1.456    80  0.179718740
## 237     AY.123    79  0.177472256
## 90       AY.10    69  0.155007413
## 70     B.1.560    68  0.152760929
## 147      AY.19    64  0.143774992
## 184      AY.15    58  0.130296087
## 26   B.1.1.354    57  0.128049602
## 148      AY.24    55  0.123556634
## 60     B.1.459    53  0.119063665
## 107    B.1.153    51  0.114570697
## 180       AY.1    51  0.114570697
## 67     B.1.538    43  0.096598823
## 65     B.1.533    40  0.089859370
## 271      AY.55    38  0.085366402
## 53   B.1.36.22    35  0.078626949
## 71     B.1.575    35  0.078626949
## 139    B.1.628    34  0.076380465
## 240     AY.126    33  0.074133980
## 103    B.1.110    32  0.071887496
## 284      AY.76    30  0.067394528
## 1            A    29  0.065148043
## 24   B.1.1.326    29  0.065148043
## 49   B.1.36.17    29  0.065148043
## 116  B.1.351.3    28  0.062901559
## 248      AY.37    26  0.058408591
## 186      AY.21    25  0.056162106
## 206      AY.27    25  0.056162106
## 245      AY.33    24  0.053915622
## 257      AY.45    24  0.053915622
## 183      AY.14    23  0.051669138
## 249      AY.39    23  0.051669138
## 69     B.1.551    22  0.049422654
## 209      AY.32    22  0.049422654
## 272      AY.56    21  0.047176169
## 224     AY.107    20  0.044929685
## 263      AY.48    20  0.044929685
## 198      AY.22    18  0.040436717
## 48   B.1.36.10    17  0.038190232
## 286      AY.78    17  0.038190232
## 292      AY.86    17  0.038190232
## 50   B.1.36.18    16  0.035943748
## 172    B.1.633    16  0.035943748
## 231     AY.117    16  0.035943748
## 66     B.1.537    15  0.033697264
## 73     B.1.609    15  0.033697264
## 258      AY.46    15  0.033697264
## 226     AY.111    14  0.031450780
## 235     AY.121    14  0.031450780
## 283      AY.75    14  0.031450780
## 2       A.23.1    13  0.029204295
## 68     B.1.540    13  0.029204295
## 99   B.1.1.318    13  0.029204295
## 182      AY.13    13  0.029204295
## 213    B.1.623    13  0.029204295
## 290      AY.84    13  0.029204295
## 93      AY.7.2    12  0.026957811
## 42       B.1.2    11  0.024711327
## 171    B.1.627    11  0.024711327
## 199       AY.6    11  0.024711327
## 297      AY.92    11  0.024711327
## 55   B.1.36.35    10  0.022464843
## 142        Q.1    10  0.022464843
## 232     AY.118    10  0.022464843
## 254      AY.42    10  0.022464843
## 281      AY.70    10  0.022464843
## 302    AY.98.1    10  0.022464843
## 37     B.1.177     9  0.020218358
## 185      AY.17     9  0.020218358
## 274      AY.59     9  0.020218358
## 301      AY.98     9  0.020218358
## 12   B.1.1.174     8  0.017971874
## 88        A.29     8  0.017971874
## 188       AY.9     8  0.017971874
## 218     AY.100     8  0.017971874
## 262      AY.47     8  0.017971874
## 289      AY.83     8  0.017971874
## 295      AY.89     8  0.017971874
## 61     B.1.468     7  0.015725390
## 80         B.6     7  0.015725390
## 82        B.60     7  0.015725390
## 181      AY.11     7  0.015725390
## 278      AY.63     7  0.015725390
## 293      AY.87     7  0.015725390
## 113    B.1.222     6  0.013478906
## 140       C.36     6  0.013478906
## 150  B.1.1.189     6  0.013478906
## 208       AY.3     6  0.013478906
## 277      AY.62     6  0.013478906
## 279      AY.64     6  0.013478906
## 285      AY.77     6  0.013478906
## 30    B.1.1.44     5  0.011232421
## 40     B.1.184     5  0.011232421
## 120  B.1.36.33     5  0.011232421
## 154  B.1.1.528     5  0.011232421
## 210       AY.7     5  0.011232421
## 250    AY.39.1     5  0.011232421
## 266     AY.5.4     5  0.011232421
## 269      AY.53     5  0.011232421
## 294      AY.88     5  0.011232421
## 10   B.1.1.101     4  0.008985937
## 51   B.1.36.19     4  0.008985937
## 83         C.1     4  0.008985937
## 119  B.1.36.31     4  0.008985937
## 165    B.1.465     4  0.008985937
## 170    B.1.596     4  0.008985937
## 214      AY.29     4  0.008985937
## 233     AY.119     4  0.008985937
## 246      AY.35     4  0.008985937
## 251   AY.4.2.3     4  0.008985937
## 252     AY.4.4     4  0.008985937
## 259    AY.46.2     4  0.008985937
## 260    AY.46.4     4  0.008985937
## 282      AY.73     4  0.008985937
## 287      AY.79     4  0.008985937
## 291      AY.85     4  0.008985937
## 18    B.1.1.25     3  0.006739453
## 101  B.1.1.419     3  0.006739453
## 108    B.1.160     3  0.006739453
## 110    B.1.210     3  0.006739453
## 112  B.1.214.2     3  0.006739453
## 121   B.1.36.9     3  0.006739453
## 124    B.1.398     3  0.006739453
## 137    B.1.582     3  0.006739453
## 144       A.27     3  0.006739453
## 174     C.36.3     3  0.006739453
## 176        P.1     3  0.006739453
## 177        Q.6     3  0.006739453
## 178        Q.8     3  0.006739453
## 207      AY.28     3  0.006739453
## 227     AY.112     3  0.006739453
## 229     AY.114     3  0.006739453
## 243      AY.18     3  0.006739453
## 253     AY.4.6     3  0.006739453
## 265     AY.5.3     3  0.006739453
## 268      AY.51     3  0.006739453
## 270      AY.54     3  0.006739453
## 273      AY.58     3  0.006739453
## 298      AY.93     3  0.006739453
## 309     AY.9.2     3  0.006739453
## 310      AY.90     3  0.006739453
## 311      AY.99     3  0.006739453
## 9     B.1.1.10     2  0.004492969
## 13   B.1.1.194     2  0.004492969
## 15   B.1.1.200     2  0.004492969
## 21   B.1.1.307     2  0.004492969
## 25   B.1.1.353     2  0.004492969
## 27   B.1.1.355     2  0.004492969
## 32    B.1.1.46     2  0.004492969
## 39   B.1.177.7     2  0.004492969
## 44  B.1.258.20     2  0.004492969
## 62     B.1.470     2  0.004492969
## 63     B.1.476     2  0.004492969
## 64     B.1.524     2  0.004492969
## 72     B.1.602     2  0.004492969
## 78      B.1.94     2  0.004492969
## 81       B.6.6     2  0.004492969
## 84         L.3     2  0.004492969
## 86         R.1     2  0.004492969
## 97   B.1.1.274     2  0.004492969
## 111    B.1.214     2  0.004492969
## 117  B.1.36.16     2  0.004492969
## 118  B.1.36.24     2  0.004492969
## 128  B.1.466.1     2  0.004492969
## 129    B.1.480     2  0.004492969
## 133    B.1.564     2  0.004492969
## 134    B.1.566     2  0.004492969
## 145       AE.2     2  0.004492969
## 152   B.1.1.37     2  0.004492969
## 157    B.1.195     2  0.004492969
## 159  B.1.351.2     2  0.004492969
## 166  B.1.466.2     2  0.004492969
## 191    B.1.305     2  0.004492969
## 192    B.1.362     2  0.004492969
## 195       C.38     2  0.004492969
## 200    B.1.1.1     2  0.004492969
## 221     AY.104     2  0.004492969
## 222     AY.105     2  0.004492969
## 228     AY.113     2  0.004492969
## 238     AY.124     2  0.004492969
## 242    AY.16.1     2  0.004492969
## 288      AY.80     2  0.004492969
## 303     AY.101     2  0.004492969
## 307    AY.46.1     2  0.004492969
## 312    AY.99.2     2  0.004492969
## 11   B.1.1.141     1  0.002246484
## 14   B.1.1.196     1  0.002246484
## 16   B.1.1.214     1  0.002246484
## 19   B.1.1.254     1  0.002246484
## 22   B.1.1.311     1  0.002246484
## 23   B.1.1.317     1  0.002246484
## 28   B.1.1.364     1  0.002246484
## 29   B.1.1.416     1  0.002246484
## 31   B.1.1.452     1  0.002246484
## 35    B.1.1.97     1  0.002246484
## 36     B.1.170     1  0.002246484
## 38   B.1.177.4     1  0.002246484
## 41     B.1.189     1  0.002246484
## 46     B.1.349     1  0.002246484
## 52   B.1.36.21     1  0.002246484
## 57     B.1.438     1  0.002246484
## 58     B.1.453     1  0.002246484
## 79        B.10     1  0.002246484
## 87        A.21     1  0.002246484
## 89        AE.7     1  0.002246484
## 94   B.1.1.135     1  0.002246484
## 95    B.1.1.17     1  0.002246484
## 96   B.1.1.201     1  0.002246484
## 98    B.1.1.28     1  0.002246484
## 100  B.1.1.351     1  0.002246484
## 102   B.1.1.57     1  0.002246484
## 104    B.1.111     1  0.002246484
## 105     B.1.12     1  0.002246484
## 106    B.1.145     1  0.002246484
## 109    B.1.164     1  0.002246484
## 114    B.1.289     1  0.002246484
## 122    B.1.371     1  0.002246484
## 123    B.1.383     1  0.002246484
## 125    B.1.442     1  0.002246484
## 126    B.1.460     1  0.002246484
## 127    B.1.466     1  0.002246484
## 130    B.1.520     1  0.002246484
## 131    B.1.523     1  0.002246484
## 135    B.1.569     1  0.002246484
## 136    B.1.576     1  0.002246484
## 138    B.1.617     1  0.002246484
## 141        P.2     1  0.002246484
## 143        Q.4     1  0.002246484
## 151  B.1.1.365     1  0.002246484
## 153  B.1.1.525     1  0.002246484
## 155    B.1.143     1  0.002246484
## 156    B.1.159     1  0.002246484
## 158    B.1.229     1  0.002246484
## 160    B.1.382     1  0.002246484
## 161    B.1.397     1  0.002246484
## 162  B.1.400.1     1  0.002246484
## 163    B.1.413     1  0.002246484
## 164    B.1.429     1  0.002246484
## 167    B.1.509     1  0.002246484
## 168    B.1.526     1  0.002246484
## 169  B.1.575.2     1  0.002246484
## 173    B.1.636     1  0.002246484
## 175       N.10     1  0.002246484
## 179       AE.4     1  0.002246484
## 189  B.1.1.121     1  0.002246484
## 190  B.1.214.3     1  0.002246484
## 193    B.1.441     1  0.002246484
## 194       C.37     1  0.002246484
## 196        A.5     1  0.002246484
## 197       AE.3     1  0.002246484
## 201  B.1.1.372     1  0.002246484
## 202    B.1.1.8     1  0.002246484
## 203    B.1.302     1  0.002246484
## 204    B.1.577     1  0.002246484
## 211    B.1.281     1  0.002246484
## 212   B.1.36.7     1  0.002246484
## 215      AY.30     1  0.002246484
## 216    B.1.395     1  0.002246484
## 217       B.49     1  0.002246484
## 230     AY.115     1  0.002246484
## 244      AY.31     1  0.002246484
## 247      AY.36     1  0.002246484
## 261    AY.46.6     1  0.002246484
## 264      AY.49     1  0.002246484
## 275      AY.60     1  0.002246484
## 280      AY.65     1  0.002246484
## 296      AY.91     1  0.002246484
## 299      AY.95     1  0.002246484
## 300      AY.96     1  0.002246484
## 304   AY.119.1     1  0.002246484
## 305   AY.120.1     1  0.002246484
## 306 AY.120.2.1     1  0.002246484
## 308      AY.71     1  0.002246484
colnames(gisaid_hcov.all)
##  [1] "Virus.name"                      "Accession.ID"                   
##  [3] "Collection.date"                 "Location"                       
##  [5] "Host"                            "Additional.location.information"
##  [7] "Sampling.strategy"               "Gender"                         
##  [9] "Patient.age"                     "Patient.status"                 
## [11] "Last.vaccinated"                 "Passage"                        
## [13] "Specimen"                        "Additional.host.information"    
## [15] "Lineage"                         "Clade"                          
## [17] "AA.Substitutions"                "Collection.month"               
## [19] "B.1.617.2"                       "B.1"                            
## [21] "B.1.617.1"                       "AY.4"                           
## [23] "B.1.1.7"                         "AY.12"                          
## [25] "AY.23"                           "AY.33"                          
## [27] "B.1.36.29"                       "B.1.1"                          
## [29] "B.1.36"
par(mfrow=c(1,1))

Lineage<- data.frame(B.1.617.2,B.1.617.1, AY.4, B.1.1.7,B.1, AY.12,AY.23,B.1.36.29,B.1.1,B.1.36)
colnames(Lineage)
##  [1] "B.1.617.2" "B.1.617.1" "AY.4"      "B.1.1.7"   "B.1"       "AY.12"    
##  [7] "AY.23"     "B.1.36.29" "B.1.1"     "B.1.36"
Lineage_r<- round(Lineage, digit=2)
#write.csv(Lineage_r, "Lineage distribution_JantoSept_India_2021.csv")
plot(Lineage$B.1.617.2, type="b", xlab= "Month 2021", ylab= "Percentage(%)", col= "black", xlim= c(1,9), lwd=2, xaxt= "n")
axis(1, at = seq(1, 9, by = 1), las=2, labels= rownames(Lineage))


lines(Lineage$B.1.617.1, type="b",lwd=2, col=my.colors[1])
lines(Lineage$AY.4, type="b",lwd=2, col=my.colors[2])
lines(Lineage$B.1.1.7, type="b",lwd=2, col=my.colors[3])
lines(Lineage$B.1, type="b", lwd=2, col=my.colors[4])
lines(Lineage$AY.12, type="b",lwd=2, col=my.colors[5])
lines(Lineage$AY.23, type="b",lwd=2, col=my.colors[6])
lines(Lineage$B.1.36.29, type="b",lwd=2, col=my.colors[7])
lines(Lineage$B.1.1, type="b",lwd=2, col=my.colors[8])
lines(Lineage$B.1.36, type="b",lwd=2, col=my.colors[9])


legend("topleft", legend= colnames(Lineage)[1:10], col= c("black", my.colors[c(1:9)]),lty=1,title= "Lineage", cex=0.75)

par(mfrow= c(2,3))

barplot(Lineage$B.1.617.1, col= my.colors[1:9], names.arg = rownames(Lineage), main= "B.1.617.1", las=2, ylim= c(0,80))
barplot(Lineage$B.1, col= my.colors[1:9], names.arg = rownames(Lineage), main= "B.1", las=2,ylim= c(0,80))
barplot(Lineage$B.1.1.7, col= my.colors[1:9], names.arg = rownames(Lineage), main= "B.1.1.7", las=2,ylim= c(0,80))
barplot(Lineage$B.1.617.2, col= my.colors[1:9], names.arg = rownames(Lineage), main= "B.1.617.2",las=2,ylim= c(0,80) )
barplot(Lineage$AY.4, col= my.colors[1:9], names.arg = rownames(Lineage), main= "AY.4", las=2,ylim= c(0,80))
barplot(Lineage$AY.12, col= my.colors[1:9], names.arg = rownames(Lineage), main= "AY.12 ", las=2,ylim= c(0,80))


cor(Lineage)
##            B.1.617.2   B.1.617.1       AY.4    B.1.1.7        B.1      AY.12
## B.1.617.2  1.0000000 -0.55150574  0.7627107 -0.6441639 -0.7945639  0.7597749
## B.1.617.1 -0.5515057  1.00000000 -0.3813487  0.9759761  0.3860769 -0.2298838
## AY.4       0.7627107 -0.38134870  1.0000000 -0.4244577 -0.3033932  0.9062736
## B.1.1.7   -0.6441639  0.97597606 -0.4244577  1.0000000  0.4314783 -0.3242978
## B.1       -0.7945639  0.38607695 -0.3033932  0.4314783  1.0000000 -0.2892071
## AY.12      0.7597749 -0.22988376  0.9062736 -0.3242978 -0.2892071  1.0000000
## AY.23      0.7126003 -0.15037961  0.8705798 -0.2531271 -0.2414937  0.9590670
## B.1.36.29 -0.7923956  0.05810885 -0.4418485  0.1717619  0.8316041 -0.4952632
## B.1.1     -0.8049458  0.13364746 -0.4308610  0.2384594  0.8519277 -0.4697462
## B.1.36    -0.8011952  0.06631746 -0.4381744  0.1674751  0.8722061 -0.4939294
##                AY.23   B.1.36.29      B.1.1      B.1.36
## B.1.617.2  0.7126003 -0.79239563 -0.8049458 -0.80119523
## B.1.617.1 -0.1503796  0.05810885  0.1336475  0.06631746
## AY.4       0.8705798 -0.44184846 -0.4308610 -0.43817436
## B.1.1.7   -0.2531271  0.17176195  0.2384594  0.16747515
## B.1       -0.2414937  0.83160413  0.8519277  0.87220607
## AY.12      0.9590670 -0.49526324 -0.4697462 -0.49392941
## AY.23      1.0000000 -0.47259993 -0.4360809 -0.47210243
## B.1.36.29 -0.4725999  1.00000000  0.9941551  0.99270704
## B.1.1     -0.4360809  0.99415508  1.0000000  0.98535909
## B.1.36    -0.4721024  0.99270704  0.9853591  1.00000000
library(ggplot2)

ggcorrplot::ggcorrplot(cor(Lineage))

par(mfrow= c(1,1))

ggcorrplot::ggcorrplot(cor(Lineage), hc.order = TRUE, outline.col = "white")

total.samples<- nrow(gisaid_hcov.all) # samples analyzed

total.samples_mw<- c("Jan"= sum(gisaid_hcov.all$Collection.month=="01"),
              "Feb"= sum(gisaid_hcov.all$Collection.month=="02"),
              "March"= sum(gisaid_hcov.all$Collection.month=="03"),
              "April"= sum(gisaid_hcov.all$Collection.month=="04"),
              "May"= sum(gisaid_hcov.all$Collection.month=="05"),
              "June"= sum(gisaid_hcov.all$Collection.month=="06"),
              "July"= sum(gisaid_hcov.all$Collection.month=="07"),
              "August"= sum(gisaid_hcov.all$Collection.month=="08"),
              "Sept"= sum(gisaid_hcov.all$Collection.month=="09")
)

covid.cases<- total.samples_mw

covid.cases_B.1.617.2<- cbind.data.frame(covid.cases, B.1.617.2*total.samples_mw/100 )
covid.cases_AY.4<- cbind.data.frame(covid.cases, AY.4*total.samples_mw/100 )
colnames(covid.cases_B.1.617.2)<- c("Reported_Cases", "Reported_B.1.617.2")
colnames(covid.cases_AY.4)<- c("Reported_Cases", "Reported_AY.4")
coeff<-max(covid.cases_B.1.617.2$Reported_Cases)/max(covid.cases_B.1.617.2$Reported_B.1.617.2)

plot(covid.cases_B.1.617.2$Reported_Cases, type="b", xlab= "Month of 2021", ylab= "cases", col= "black", xlim= c(1,9), xaxt= "n")
axis(1, at = seq(1, 9, by = 1), las=2, labels= rownames(covid.cases_B.1.617.2))

library(ggplot2)

ggplot(data= covid.cases_B.1.617.2, aes(x=c(1:9)))+
  geom_smooth(aes(y=Reported_Cases), col= "blue", method= "loess")+
  geom_smooth( aes(y=Reported_B.1.617.2*coeff), col= "red", method= "loess")+
scale_y_continuous(
  
  # Features of the first axis
  name = "COVID-19 cases/month",
  
  # Add a second axis and specify its features
  sec.axis = sec_axis(~./coeff, name="Reported B1.617.2/month")
)+

theme(
    axis.title.y.left=element_text(color="blue"),
    axis.text.y.left=element_text(color="blue"),
    axis.title.y.right=element_text(color="red"),
    axis.text.y.right=element_text(color="red"),
    axis.text.x.top = element_text()
  )+
  scale_x_continuous(name= "Month of 2021", breaks=1:9, labels= substr(rownames(covid.cases_B.1.617.2), 1,3))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

#AY.4
covid.cases_AY.4<- cbind.data.frame(covid.cases,AY.4*total.samples_mw/100 )
colnames(covid.cases_AY.4)<-c("Reported_Cases", "Reported_AY.4") 
coeff<-max(covid.cases_AY.4$Reported_Cases)/max(covid.cases_AY.4$Reported_AY.4)

ggplot(data= covid.cases_AY.4, aes(x=c(1:9)))+
  geom_smooth(aes(y=Reported_Cases), col= "blue", method= "loess")+
  geom_smooth( aes(y=Reported_AY.4*coeff), col= "red", method= "loess")+
  scale_y_continuous(
    
    # Features of the first axis
    name = "COVID-19 cases/month",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis(~./coeff, name="Reported AY.4/month")
  )+
  
  theme(
    axis.title.y.left=element_text(color="blue"),
    axis.text.y.left=element_text(color="blue"),
    axis.title.y.right=element_text(color="red"),
    axis.text.y.right=element_text(color="red"),
    axis.text.x.top = element_text()
  )+
  scale_x_continuous(name= "Month of 2021", breaks=1:9, labels= substr(rownames(covid.cases_AY.4), 1,3))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

#B.1.1
covid.cases_B.1.1<- cbind.data.frame(covid.cases,B.1.1*total.samples_mw/100 )
colnames(covid.cases_B.1.1)<-c("Reported_Cases", "Reported_B.1.1") 
coeff<-max(covid.cases_B.1.1$Reported_Cases)/max(covid.cases_B.1.1$Reported_B.1.1)

ggplot(data= covid.cases_B.1.1, aes(x=c(1:9)))+
  geom_smooth(aes(y=Reported_Cases), col= "blue", method= "loess")+
  geom_smooth( aes(y=Reported_B.1.1*coeff), col= "red", method= "loess")+
  scale_y_continuous(
    
    # Features of the first axis
    name = "COVID-19 cases/month",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis(~./coeff, name="Reported B.1.1/month")
  )+
  
  theme(
    axis.title.y.left=element_text(color="blue"),
    axis.text.y.left=element_text(color="blue"),
    axis.title.y.right=element_text(color="red"),
    axis.text.y.right=element_text(color="red"),
    axis.text.x.top = element_text()
  )+
  scale_x_continuous(name= "Month of 2021", breaks=1:9, labels= substr(rownames(covid.cases_B.1.1), 1,3))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

#B.1.617.1
covid.cases_B.1.617.1<- cbind.data.frame(covid.cases,B.1.617.1*total.samples_mw/100 )
colnames(covid.cases_B.1.617.1)<-c("Reported_Cases", "Reported_B.1.617.1") 
coeff<-max(covid.cases_B.1.617.1$Reported_Cases)/max(covid.cases_B.1.617.1$Reported_B.1.617.1)

ggplot(data= covid.cases_B.1.617.1, aes(x=c(1:9)))+
  geom_smooth(aes(y=Reported_Cases), col= "blue", method= "loess")+
  geom_smooth( aes(y=Reported_B.1.617.1*coeff), col= "red", method= "loess")+
  scale_y_continuous(
    
    # Features of the first axis
    name = "COVID-19 cases/month",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis(~./coeff, name="Reported B.1.617.1/month")
  )+
  
  theme(
    axis.title.y.left=element_text(color="blue"),
    axis.text.y.left=element_text(color="blue"),
    axis.title.y.right=element_text(color="red"),
    axis.text.y.right=element_text(color="red"),
    axis.text.x.top = element_text()
  )+
  scale_x_continuous(name= "Month of 2021", breaks=1:9, labels= substr(rownames(covid.cases_B.1.617.1), 1,3))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

head(y)
##         Var1  Freq percentage
## 75 B.1.617.2 18764  42.153031
## 74 B.1.617.1  4478  10.059756
## 4       AY.4  3428   7.700948
## 34   B.1.1.7  3213   7.217954
## 7        B.1  2640   5.930718
## 91     AY.12  1016   2.282428
Ay.x<-  c("Jan"= sum(ifelse(c(gisaid_hcov.all$Ay.x==1 & gisaid_hcov.all$Collection.month=="01"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="01")*100,
          "Feb"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="02"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="02")*100,
          "March"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="03"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="03")*100,
          "April"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="04"), 1,0))/ sum(gisaid_hcov.all$Collection.month=="04")*100,
          "May"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="05"), 1,0))/   sum(gisaid_hcov.all$Collection.month=="05")*100,
          "June"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="06"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="06")*100,
          "July"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="07"), 1,0))/  sum(gisaid_hcov.all$Collection.month=="07")*100,
          "August"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="08"), 1,0))/sum(gisaid_hcov.all$Collection.month=="08")*100,
          "Sept"= sum(ifelse(c(gisaid_hcov.all$B.1.36.29==1 & gisaid_hcov.all$Collection.month=="09"), 1,0))/sum(gisaid_hcov.all$Collection.month=="09")*100
)

df<- gisaid_hcov.all
df$Lineage<- gsub("AY.*", "AY.x",df$Lineage)
y2<- data.frame(table(df$Lineage)) 

y2<- y2[order(y2$Freq, decreasing=T),]
y2<- y2[y2$Var1!="None",]
y2$percentage<- c(y2$Freq/sum(y2$Freq))*100
y2[1:15,]
##          Var1  Freq percentage
## 160 B.1.617.2 18764 42.1530305
## 11       AY.x 10589 23.7880217
## 159 B.1.617.1  4478 10.0597565
## 57    B.1.1.7  3213  7.2179539
## 13        B.1  2640  5.9307184
## 103 B.1.36.29   660  1.4826796
## 14      B.1.1   508  1.1412140
## 94     B.1.36   488  1.0962843
## 34  B.1.1.306   293  0.6582199
## 29  B.1.1.216   278  0.6245226
## 161 B.1.617.3   236  0.5301703
## 89    B.1.333   211  0.4740082
## 139   B.1.525   209  0.4695152
## 91    B.1.351   176  0.3953812
## 162   B.1.618   164  0.3684234
df$AY.x<- factor(ifelse(df$Lineage=="AY.x", 1, 0))
AY.x<- c("Jan"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="01"), 1,0))/   sum(df$Collection.month=="01")*100,
              "Feb"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="02"), 1,0))/   sum(df$Collection.month=="02")*100,
              "March"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="03"), 1,0))/ sum(df$Collection.month=="03")*100,
              "April"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="04"), 1,0))/ sum(df$Collection.month=="04")*100,
              "May"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="05"), 1,0))/   sum(df$Collection.month=="05")*100,
              "June"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="06"), 1,0))/  sum(df$Collection.month=="06")*100,
              "July"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="07"), 1,0))/  sum(df$Collection.month=="07")*100,
              "August"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="08"), 1,0))/sum(df$Collection.month=="08")*100,
              "Sept"= sum(ifelse(c(df$AY.x==1 & df$Collection.month=="09"), 1,0))/sum(df$Collection.month=="09")*100
)

barplot(AY.x, col= my.colors[1:9], main= "AY.x")

barplot(B.1.617.2, col= my.colors[1:9], main= "B.1.617.2")

Lineage$AY.x<-  AY.x
plot(Lineage$B.1.617.2, type="b", xlab= "Month 2021", ylab= "Percentage(%)", col= "black", xlim= c(1,9), lwd=2, xaxt= "n")
axis(1, at = seq(1, 9, by = 1), las=2, labels= rownames(Lineage))

lines(Lineage$AY.x, type="b",lwd=2,lty=2, col="blue")
legend("topleft", legend= c("B.1.617.2", "AY.x"), col= c("black", "blue"), lty= c(1,2), lwd=2)

#AY.x
covid.cases_AY.x<- cbind.data.frame(covid.cases,AY.x*total.samples_mw/100 )
colnames(covid.cases_AY.x)<-c("Reported_Cases", "Reported_AY.x") 
coeff<-max(covid.cases_AY.x$Reported_Cases)/max(covid.cases_AY.x$Reported_AY.x)

ggplot(data= covid.cases_AY.x, aes(x=c(1:9)))+
  geom_smooth(aes(y=Reported_Cases), col= "blue", method= "loess")+
  geom_smooth( aes(y=Reported_AY.x*coeff), col= "red", method= "loess")+
  scale_y_continuous(
    
    # Features of the first axis
    name = "COVID-19 cases/month",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis(~./coeff, name="Reported AY.x/month")
  )+
  
  theme(
    axis.title.y.left=element_text(color="blue"),
    axis.text.y.left=element_text(color="blue"),
    axis.title.y.right=element_text(color="red"),
    axis.text.y.right=element_text(color="red"),
    axis.text.x.top = element_text()
  )+
  scale_x_continuous(name= "Month of 2021", breaks=1:9, labels= substr(rownames(covid.cases_AY.x), 1,3))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'