Summary

The following datasets contain early epidemiological and genetic findings of Zika Virus in the Americas on or befor 2015 by state of residence and municipality of residence in Brazil. Zika dataset contains number of suspected zika virus cases from January 2015 to the end of December 2015; Microenphaly dataset contain number of suspected microcephaly cases from January 2015 to the first week of January 2016 and numbers correspond to suspected microcephaly cases at week 20 of pregnancy.


setwd("E:/DataIncubatorChallenge/Zika")

#library("plyr")
        
#download zika data
zika=read.csv("ZikaData.csv", header = TRUE)

#relabel column names 
#"W" are Epidemiological weeks in 2015 
#Epidemiological weeks before 2015 are numbered negative weeks 
colnames(zika) <- gsub("X", "W", colnames(zika)) 


#include state and municipality codes and definitions
zika_smcode <- read.csv("StateMunicipalityCode.csv", header = TRUE)

#select Rio de Janeiro state
RJ = subset(zika_smcode, zika_smcode$State =="RJ")
RJ <- strsplit(as.character(RJ$Municipality.of.Residence), " ")
RJ <- data.frame(do.call(rbind, RJ))
## Warning in (function (..., deparse.level = 1) : number of columns of result
## is not a multiple of vector length (arg 1)
#RJ <-data.frame(plyr::ldply(RJ, rbind))

#seperate the codes from the names for each municipality
RJ$name <- apply(RJ[,2:5], 1, paste0, collapse =" ")
RJ$name <- gsub("[0-9]", "", RJ$name)
RJ$code <- RJ[,1]

#filter zika virus data by Rio de Janeiro Olympics 2016 venues 
zika_olympics <- subset(zika, Municipality == RJ[13,]$code)

#number of suspected zika virus cases reported in Rio de Janeiro municipaility in 2015 
n_zikaolympics = sum(zika_olympics[2:50])

#proportion of suspected zika virus cases reported in Rio de Janeiro municipaility in 2015 
p_zikaolympics = sum(zika_olympics[2:50])/sum(rowSums(zika[,2:50]))

#filter zika virus data by all RJ municipality codes
zika_RJ = NULL
for (n in 1:nrow(RJ)) {zika_RJ <- rbind(zika_RJ, subset(zika, zika$Municipality == RJ$code[n]))}

#number of suspected zika virus cases reported in Rio de Janeiro state in 2015 
n_zikaRJ = sum(zika_RJ[2:50])

#porportion  of suspected of zika virus cases reported in the state of Rio de Janeiro in 2015
p_zikaRJ = sum(zika_RJ[2:50])/sum(rowSums(zika[,2:50]))

#number of suspected zika virus cases reported in Brazil in 2015 
n_zikaBrazil = sum(rowSums(zika[,2:50]))

#plot1 - number ofsuspected cases
#pdf("plot1a.pdf")
barplot(c(n_zikaolympics, n_zikaRJ, n_zikaBrazil), names.arg = c("Olympics venues in RJ", "RJ state", "Brazil"), ylab = "Number of suspected cases", main = " Zika Virus in 2016 Olympic venues vs. Rio de Janeiro (RJ) in Brazil")
text(x = c(1,2,3), y = c(n_zikaolympics+1000, n_zikaRJ+1000, n_zikaBrazil-1000), labels = c(n_zikaolympics, n_zikaRJ, n_zikaBrazil))

#plot2 - porportion of suspected cases 
#png(filename = "plot2.png", width = 600, height = 600, units = "px")
barplot(c(p_zikaolympics, p_zikaRJ, 1), names.arg = c("Olympics venues in RJ", "RJ state", "Brazil"), ylab = "Proporton of suspected cases", main = " Zika Virus in 2016 Olympic venues vs. Rio de Janeiro (RJ) in Brazil")
text(x = c(1,2,3), y = c(0.05, 0.25, 0.95), labels = c(round(p_zikaolympics,3), round(p_zikaRJ,3), "1"))

dev.off()
## null device 
##           1

Conclusion

The likelihood of suspected cases of zika virus in olympic venues is 0.0024292318 while the likelihood of suspected cases of zika virus in the state of Rio de Janeiro is 0.0139006039.