R Markdown

Tidying data using tidyverse avaiable at http://tidyverse.org/:

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.3
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
#1) Read the measle incidence table downloaded from the who.org website and skip first line:

measle<- read_csv("C:/Users/ambra/Desktop/Data 607/W6/Measle_incidence_by_country.csv", skip = 1, na=" ")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   Country = col_character()
## )
## See spec(...) for full column specifications.
head(measle)
## # A tibble: 6 × 42
##               Country `2014` `2013` `2012` `2011` `2010` `2009` `2008`
##                 <chr>  <int>  <int>  <int>  <int>  <int>  <int>  <int>
## 1         Afghanistan    492    430   2787   3013   1989   2861   1599
## 2             Albania     NA      0      9     28     10      0     NA
## 3             Algeria      0     25     18    112    103    107    217
## 4             Andorra     NA      0      0      0      0      0      0
## 5              Angola  11699   8523   4458   1449   1190   2807    265
## 6 Antigua and Barbuda      0      0      0      0      0      0      0
## # ... with 34 more variables: `2007` <int>, `2006` <int>, `2005` <int>,
## #   `2004` <int>, `2003` <int>, `2002` <int>, `2001` <int>, `2000` <int>,
## #   `1999` <int>, `1998` <int>, `1997` <int>, `1996` <int>, `1995` <int>,
## #   `1994` <int>, `1993` <int>, `1992` <int>, `1991` <int>, `1990` <int>,
## #   `1989` <int>, `1988` <int>, `1987` <int>, `1986` <int>, `1985` <int>,
## #   `1984` <int>, `1983` <int>, `1982` <int>, `1981` <int>, `1980` <int>,
## #   `1979` <int>, `1978` <int>, `1977` <int>, `1976` <int>, `1975` <int>,
## #   `1974` <int>
ncol(measle)
## [1] 42
 ##Compute the average for all measle cases by country, arrange ascending order and then desc order, select top and bottom 10 countries and plot


avgmeasle<-measle %>% mutate(average=round(rowMeans(.[, -1], na.rm=TRUE))) 

minmeasle<- avgmeasle %>%  arrange(average) %>% slice(1:10)

maxmeasle<- avgmeasle %>%  arrange(desc(average)) %>% slice(1:10)

y<- bind_rows(minmeasle, maxmeasle)


##extract ISO3 names of above countries
library(countrycode)
## Warning: package 'countrycode' was built under R version 3.3.3
countries<- countrycode(avgmeasle$Country, 'country.name', 'iso3c')

yiso<-avgmeasle  %>% mutate(ISO=countries)


##map using rworldmap
library(rworldmap)
## Warning: package 'rworldmap' was built under R version 3.3.3
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type :   vignette('rworldmap')
measleMap <- joinCountryData2Map(yiso, joinCode = "ISO3",
  nameJoinColumn = "ISO")
## 194 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 49 codes from the map weren't represented in your data
##In the future, I will work on adjusting  the map below
mapCountryData(measleMap, nameColumnToPlot="average", catMethod = "quantiles",   missingCountryCol = gray(.8), addLegend=FALSE, mapTitle="1974-2014 Avg of Reported cases of measles- country comparison")

#Gather years (col names) as a var, cases (values) as a var;
 measlen<- measle %>% gather("Year", "Cases", 2:42, na.rm=T)
 
 ##Compute tot cases per year and plot change over time worldwide

x<- measlen %>% count(Year, wt = Cases) 

ggplot(x, aes(Year,n)) + 
  geom_point(aes(colour=Year))+
  scale_x_discrete(breaks=c('1974', '1979', '1984', '1989', '1994', '1999', '2004', '2009', '2014'))+
  ggtitle("Measle worldwide reported cases by year (Source: WHO)")+
  theme(legend.position="none")

There has been a steep decrease in reported cases of measles worldwide between 1974 and 2014. The WHO dataset does not provide the population size, therefore the heat map above is based on the average of reported cases by country.