library(dplyr)
library(data.table)
library(ggplot2)
library(RColorBrewer)
library(rworldmap)
setwd("C:/Data Science Fundation with R/Kraggle/zika-virus-epidemic")
list.files("C:/Data Science Fundation with R/Kraggle/zika-virus-epidemic")
 [1] "cdc_zika.csv"                                            "examples"                                               
 [3] "Rplot.png"                                               "Rplot01.png"                                            
 [5] "Rplot02.png"                                             "Rplot03.png"                                            
 [7] "Rplot04.png"                                             "Rplot05.png"                                            
 [9] "Rplot06.png"                                             "Rplot07.png"                                            
[11] "Rplot08.png"                                             "Rplot09.png"                                            
[13] "Rplot10.png"                                             "Rplot11.png"                                            
[15] "Rplot12.png"                                             "Rplot13.png"                                            
[17] "Rplot14.png"                                             "rsconnect"                                              
[19] "Zika-ZF Yi.r"                                            "Zika-ZF Yi.Rmd"                                         
[21] "Zika Virus Data Visulization_ R Notebook -ZF Yi.nb.html" "Zika Virus Data Visulization_ R Notebook -ZF Yi.Rmd"    
zika <- read.csv('cdc_zika.csv',header=TRUE, fill=TRUE,row.names=NULL)
zika <- data.table(zika)
zika[, c("Country", "Province") := tstrsplit(location, "-", fixed = TRUE)][]
Warning in `[.data.table`(zika, , `:=`(c("Country", "Province"), tstrsplit(location,  :
  Supplied 2 columns to be assigned a list (length 4) of values (2 unused)
        report_date                     location location_type                          data_field data_field_code
     1:   3/19/2016       Argentina-Buenos_Aires      province    cumulative_confirmed_local_cases          AR0001
     2:   3/19/2016       Argentina-Buenos_Aires      province     cumulative_probable_local_cases          AR0002
     3:   3/19/2016       Argentina-Buenos_Aires      province cumulative_confirmed_imported_cases          AR0003
     4:   3/19/2016       Argentina-Buenos_Aires      province  cumulative_probable_imported_cases          AR0004
     5:   3/19/2016       Argentina-Buenos_Aires      province        cumulative_cases_under_study          AR0005
    ---                                                                                                           
107615:   6/28/2016 United_States_Virgin_Islands     territory            confirmed_conjunctivitis          VI0017
107616:   6/28/2016 United_States_Virgin_Islands     territory                   confirmed_eyepain          VI0018
107617:   6/28/2016 United_States_Virgin_Islands     territory                  confirmed_headache          VI0019
107618:   6/28/2016 United_States_Virgin_Islands     territory                   confirmed_malaise          VI0020
107619:   6/28/2016 United_States_Virgin_Islands     territory                    zika_no_specimen          VI0021
        time_period time_period_type value  unit                      Country     Province
     1:          NA               NA     0 cases                    Argentina Buenos_Aires
     2:          NA               NA     0 cases                    Argentina Buenos_Aires
     3:          NA               NA     2 cases                    Argentina Buenos_Aires
     4:          NA               NA     1 cases                    Argentina Buenos_Aires
     5:          NA               NA   127 cases                    Argentina Buenos_Aires
    ---                                                                                   
107615:          NA               NA     7 cases United_States_Virgin_Islands           NA
107616:          NA               NA    13 cases United_States_Virgin_Islands           NA
107617:          NA               NA    14 cases United_States_Virgin_Islands           NA
107618:          NA               NA     5 cases United_States_Virgin_Islands           NA
107619:          NA               NA     2 cases United_States_Virgin_Islands           NA
zika$report_date <-as.Date(zika$report_date, "%m/%d/%Y")
zika$Year <- as.numeric(format(zika$report_date, format = "%Y"))
zika %>%
  filter(!is.na(Year)) %>%
  group_by(Country, Year) %>%
  summarise(n = n())-> ZikaOc
names(ZikaOc)[3] <- "cases"
ggplot(ZikaOc, aes(x= Country, y = cases)) +
  geom_bar(stat="identity") +
  coord_flip()+
  facet_wrap(~Year)

Globally reported zika cases, about 107, 619 obervations, were mainly occured in 12 countries: Argentina (2,091 obervations), Brazil (4,253), Colombia (86,889), Cominican Republican (5,716), Ecuador (796), El Salvador (1,000), Guatemala (516), Haiti (52), Mexico (2894), Nicaragua (125), Panama(148) and USA (2,962).

data(countryExData,envir=environment(),package="rworldmap")
str(countryExData)
'data.frame':   149 obs. of  80 variables:
 $ ISO3V10                     : chr  "AGO" "ALB" "ARE" "ARG" ...
 $ Country                     : chr  "Angola" "Albania" "United Arab Emirates            " "Argentina" ...
 $ EPI_regions                 : chr  "Sub-Saharan Africa" "Central and Eastern Europ" "Middle East and North Africa" "Latin America and Caribbe" ...
 $ GEO_subregion               : chr  "Southern Africa" "Central Europe" "Arabian Peninsula" "South America" ...
 $ Population2005              : num  15941 3130 4496 38747 3016 ...
 $ GDP_capita.MRYA             : num  2314 4955 22698 13652 5011 ...
 $ landlock                    : int  0 0 0 0 1 0 1 1 1 0 ...
 $ landarea                    : num  1251896 28346 74777 2736296 28273 ...
 $ density                     : num  0.2 34.3 8.7 1.3 30.3 0.3 16.3 14.6 91 71.2 ...
 $ EPI                         : num  39.5 84 64 81.8 77.8 79.8 89.4 72.2 54.7 78.4 ...
 $ ENVHEALTH                   : num  8.9 89.3 89.8 91.1 88 99.3 98.1 76.4 37.6 98.8 ...
 $ ECOSYSTEM                   : num  70.1 78.6 38.2 72.5 67.5 60.4 80.7 67.9 71.7 58 ...
 $ ENVHEALTH.1                 : num  8.9 89.3 89.8 91.1 88 99.3 98.1 76.4 37.6 98.8 ...
 $ AIR_E                       : num  49.2 99.1 85.1 87.3 99.4 84.9 97 97.7 99.5 50.2 ...
 $ WATER_E                     : num  61.6 96.5 27.1 74.9 28 62.5 79.9 48.5 62.8 52.3 ...
 $ BIODIVERSITY                : num  58.9 4 36.6 33.6 16 78.1 71.6 29 62.5 10 ...
 $ PRODUCTIVE_NATURAL_RESOURCES: num  81.3 79.4 74.1 71.5 82.1 91.8 88.2 85.7 48 76.1 ...
 $ CLIMATE                     : num  74.6 93.4 26.6 82.3 87.2 42.5 79.9 77.1 81.5 69.5 ...
 $ DALY_SC                     : num  0 99.5 98.9 98 98.2 99.6 99.8 93 26.1 99.6 ...
 $ WATER_H                     : num  19.8 91.3 98.8 91.3 83.3 100 100 53.6 44.7 100 ...
 $ AIR_H                       : num  16 66.8 62.4 76.9 72.5 97.9 92.8 66.2 53.5 96 ...
 $ AIR_E.1                     : num  49.2 99.1 85.1 87.3 99.4 84.9 97 97.7 99.5 50.2 ...
 $ WATER_E.1                   : num  61.6 96.5 27.1 74.9 28 62.5 79.9 48.5 62.8 52.3 ...
 $ BIODIVERSITY.1              : num  58.9 4 36.6 33.6 16 78.1 71.6 29 62.5 10 ...
 $ FOREST                      : num  95.4 100 100 75.9 70.1 100 100 100 0 100 ...
 $ FISH                        : num  87.3 62.5 50 58.8 NA 96.7 NA NA NA 47.4 ...
 $ AGRICULTURE                 : num  61.3 75.6 72.3 79.9 94.2 78.7 76.4 71.4 95.9 80.8 ...
 $ CLIMATE.1                   : num  74.6 93.4 26.6 82.3 87.2 42.5 79.9 77.1 81.5 69.5 ...
 $ ACSAT_pt                    : num  19.3 89.5 97.7 89.5 80.1 100 100 46.2 25.1 100 ...
 $ WATSUP_pt                   : num  20.2 93.2 100 93.2 86.4 100 100 61 64.3 100 ...
 $ DALY_pt                     : num  0 99.5 98.9 98 98.2 99.6 99.8 93 26.1 99.6 ...
 $ INDOOR_pt                   : num  0 47.4 94.7 94.7 72.2 94.7 94.7 48.4 0 94.7 ...
 $ PM10_pt                     : num  40 70.1 11.2 51.3 59 100 87.8 67 84.1 95.4 ...
 $ OZONE_H_pt                  : num  0 99.1 100 92.4 100 100 99.2 100 99.4 99.7 ...
 $ SO2_pt                      : num  98.4 98.5 70.2 98.8 98.8 69.9 94.4 95.4 99.3 0.6 ...
 $ OZONE_E_pt                  : num  0 99.8 100 75.7 100 100 99.6 100 99.6 99.8 ...
 $ WATQI_pt                    : num  29.4 93 0 76.4 31.7 75.3 59.8 31.7 25.6 59.6 ...
 $ WATSTR_pt                   : num  98.3 90.3 100 100 100 73.4 100 100 63 100 ...
 $ WATQI_GEMS.station.data     : num  NA 93 NA 76.4 NA 75.3 59.8 NA NA 59.6 ...
 $ FORGRO_pt                   : num  95.4 100 100 75.9 70.1 100 100 100 0 100 ...
 $ CRI_pt                      : num  99.7 5.5 100 39.8 37.7 86.1 80.1 46.2 84.1 9.6 ...
 $ EFFCON_pt                   : num  95.7 1.6 2.3 33.9 10.4 79 63 11.9 40.9 11.5 ...
 $ AZE_pt                      : num  0 NA NA 40 0 69.4 NA NA NA NA ...
 $ MPAEEZ_pt                   : num  14 6 1 2 100 78 100 100 100 0 ...
 $ EEZTD_pt                    : num  74.5 25.1 0 17.5 NA 93.5 NA NA NA 0 ...
 $ MTI_pt                      : num  100 100 100 100 NA 100 NA NA NA 94.9 ...
 $ IRRSTR_pt                   : num  97.5 100 51.8 74.6 97 50.7 100 82.9 100 100 ...
 $ AGINT_pt                    : num  100 90.2 100 78.4 94.5 79.6 63.2 91.1 92 87.1 ...
 $ AGSUB_pt                    : num  100 100 100 100 100 99.9 22.8 100 100 22.8 ...
 $ BURNED_pt                   : num  0 78.9 96.1 55.7 79.5 63.3 96 78.4 87.7 98.6 ...
 $ PEST_pt                     : num  9.1 9.1 13.6 90.9 100 100 100 4.5 100 95.5 ...
 $ GHGCAP_pt                   : num  65.8 98.8 38.6 87.1 98 45.4 81.6 88.7 94 77.7 ...
 $ CO2IND_pt                   : num  95 85 32.1 92.7 78.3 76.2 82.3 97.1 100 59.7 ...
 $ CO2KWH_pt                   : num  63 96.3 9 67 85.1 5.9 75.7 45.6 50.5 71.1 ...
 $ ACSAT                       : num  31 91 98 91 83 100 100 54 36 100 ...
 $ WATSUP                      : num  53 96 100 96 92 100 100 77 79 100 ...
 $ DALY                        : num  109 0.3 0.6 1.1 1 0.2 0.1 3.9 41 0.2 ...
 $ INDOOR                      : num  95 50 5 5 26.4 5 5 49 95 5 ...
 $ PM10                        : num  91.4 55.5 125.6 77.9 68.7 ...
 $ OZONE_H                     : num  4948.8 15.8 0 140.4 0 ...
 $ SO2                         : num  0.7 0.6 12.6 0.5 0.5 12.7 2.4 1.9 0.3 41.9 ...
 $ OZONE_E                     : num  1.36e+09 6.81e+05 2.63e+01 9.96e+07 0.00 ...
 $ WATQI                       : num  57.5 95.8 39.9 85.8 58.9 85.2 75.9 58.9 55.3 75.7 ...
 $ WATQI_GEMS.station.data.1   : num  NA 95.8 NA 85.8 NA 85.2 75.9 NA NA 75.7 ...
 $ WATSTR                      : num  5.5 0 41.6 24.1 68.6 45.7 0 31.4 0 49.8 ...
 $ FORGRO                      : num  1 1 1 0.9 0.9 1 1.1 1 0.6 1.1 ...
 $ CRI                         : num  0.5 0 0.5 0.2 0.2 0.4 0.4 0.2 0.4 0 ...
 $ EFFCON                      : num  9.6 0.2 0.2 3.4 1 7.9 6.3 1.2 4.1 1.2 ...
 $ AZE                         : num  0 NA NA 40 0 69.4 NA NA NA NA ...
 $ MPAEEZ                      : num  1.4 0.6 0.1 0.2 10 7.8 10 10 10 0 ...
 $ EEZTD                       : num  0.255 0.749 1 0.825 NA ...
 $ MTI                         : num  0.0016 0 0.0034 0.0044 NA 0.0014 NA NA NA -0.001 ...
 $ IRRSTR                      : num  2.2 0 41 21.6 2.5 41.9 0 14.6 0 0 ...
 $ AGINT                       : num  0 6.2 0 13.7 3.5 12.9 23.3 5.6 5.1 8.2 ...
 $ AGSUB                       : num  0 0 0 0 0 0 36 0 0 36 ...
 $ BURNED                      : num  15.3 2.9 0.5 6 2.8 5 0.5 2.9 1.7 0.2 ...
 $ PEST                        : num  2 2 3 20 22 22 22 1 22 21 ...
 $ GHGCAP                      : num  20 2.9 34.1 8.9 3.3 30.5 11.8 8.1 5.3 13.8 ...
 $ CO2IND                      : num  1.2 1.9 5.5 1.4 2.3 2.5 2.1 1.1 0.8 3.6 ...
 $ CO2KWH                      : num  343 34 844 306 138 873 225 505 459 268 ...
Test <- merge(countryExData, ZikaOc, by = "Country")
sPDF <- joinCountryData2Map(Test, joinCode = "ISO3", nameJoinColumn = "ISO3V10")
11 codes from your data successfully matched countries in the map
0 codes from your data failed to match with a country code in the map
234 codes from the map weren't represented in your data
mapDevice() #create world map shaped window
mapCountryData(sPDF, nameColumnToPlot='cases', addLegend = TRUE, mapTitle = "Global Zika virus epidemic cases from 2015 to 2016")

#mapBubbles(sPDF,nameZSize="cases",nameZColour="Country",colourPalette="PuBuGn",oceanCol="lightblue",landCol="wheat")

Zika virus epidemic cases mainly happened in North and sourth American in 2015 and 2016.

USA <- zika[grep("United_States", zika$location),]
Mexico <- zika[grep("Mexico", zika$location),]
Panama <- zika[grep("Panama", zika$location),]
Nicaragua <- zika[grep("Nicaragua", zika$location),]
Haiti <- zika[grep("Haiti", zika$location),]
Guatemala <- zika[grep("Guatemala", zika$location),]
El_salvador <- zika[grep("El_Salvador", zika$location),]
Ecuador <- zika[grep("Ecuador", zika$location),]
Dominican_republic <- zika[grep("Dominican_Republic", zika$location),]
Colombia <- zika[grep("Colombia", zika$location),]
Argentina <- zika[grep("Argentina", zika$location),]
Brazil <- zika[grep("Brazil", zika$location),]
USA %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_USA
g1 <- ggplot(g_USA, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'USA: Reported Zika cases', 
       x = 'cases types')
Mexico %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_Mexico
g2 <-ggplot(g_Mexico, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'Mexico: Reported Zika cases', 
       x = 'cases types')
Panama %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_Panama
g3 <-ggplot(g_Panama, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'Panama: Reported Zika cases', 
       x = 'cases types')
Nicaragua %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_Nicaragua
g4 <-ggplot(g_Nicaragua, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'Nicaragua: Reported Zika cases', 
       x = 'cases types')
Haiti %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_Haiti
g5 <-ggplot(g_Haiti, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'Haiti: Reported Zika cases', 
       x = 'cases types')
Guatemala %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_Guatemala
g6 <-ggplot(g_Guatemala, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'Guatemala: Reported Zika cases', 
       x = 'cases types')
El_salvador %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_El_salvador
g7 <-ggplot(g_El_salvador, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'El_salvador: Reported Zika cases', 
       x = 'cases types')
Ecuador %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_Ecuador
g8 <-ggplot(g_Ecuador, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'Ecuador: Reported Zika cases', 
       x = 'cases types')
Dominican_republic %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_Dominican_republic
g9 <-ggplot(g_Dominican_republic, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'Dominican_republic: Reported Zika cases', 
       x = 'cases types')
Colombia %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_Colombia
g10 <-ggplot(g_Colombia, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'Colombia: Reported Zika cases', 
       x = 'cases types')
Argentina %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_Argentina
g11 <-ggplot(g_Argentina, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'Argentina: Reported Zika cases', 
       x = 'cases types')
Brazil %>% 
  group_by(data_field, Year) %>% 
  summarise(Cases = n()) -> g_Brazil
g12 <-ggplot(g_Brazil, aes(x = data_field, y = Cases)) +
  geom_bar(stat = 'identity',colour = 'white') +
  facet_wrap(~ Year) +
  scale_fill_hue() +
  coord_flip() +
  labs(y = 'Brazil: Reported Zika cases', 
       x = 'cases types')
g1 # Frome the cases in USA, most zika cases was reported from travel and local. Local cases mainly were reported from Puerto Rico, New York, Florida, and Virgin Island. 

g2

g3

g4

g5

g6

g7

g8

g9

g10

g11

g12

