Project 2B Malaria Mortality

library(stringr)

gt <- read.csv("https://raw.githubusercontent.com/igukusamuel/DATA-607-Assignments/master/Malaria_related_deaths.csv")
str(gt)
## 'data.frame':    9 obs. of  1 variable:
##  $ ï..Estimated.number.of.malaria.deaths.by.WHO.region: Factor w/ 9 levels " Americas 480 450 400 400 300 320 460 630",..: 5 3 1 2 4 6 7 9 8
gdata ="Estimated number of malaria deaths by WHO region
Number of deaths 2010 2011 2012 2013 2014 2015 2016 2017
African 555 000 517 000 489 000 467 000 446 000 432 000 413 000 403 000
Americas 480 450 400 400 300 320 460 630
Eastern Mediterranean 8 070 7 280 7 340 6 750 8 520 8 660 8 160 8 300 
European 0 0 0 0 0 0 0 0 
South-East Asia 39 800 32 800 28 400 21 800 24 100 25 200 25 600 19 700 
Western Pacific 3 770 3 340 3 850 4 600 4 420 2 860 3 510 3 620 
World 607 000 561 000 529 000 500 000 483 000 469 000 451 000 435 000 
World (children aged under 5 years) 444 600 405 000 371 000 344 000 322 000 302 000 283 000 266 000"
num_values <- unlist(str_extract_all(gdata,"\\d{3,}"))
str(num_values)
##  chr [1:88] "2010" "2011" "2012" "2013" "2014" "2015" "2016" "2017" ...
text <- unlist(str_extract_all(gdata, "[[:alpha:]- ]{5,}"))
str(text)
##  chr [1:12] "Estimated number of malaria deaths by WHO region" ...
years <- num_values[1:8]
num_mala <- "555"
for (i in seq(11,120,2)) {
  num_mala <- c(num_mala, num_values[i])
}
num_mala
##  [1] "555" "517" "489" "467" "446" "432" "413" "403" "480" "400" "300"
## [12] "460" "070" "340" "520" "160" "800" "400" "100" "600" "770" "850"
## [23] "420" "510" "607" "561" "529" "500" "483" "469" "451" "435" "444"
## [34] "405" "371" "344" "322" "302" "283" "266" NA    NA    NA    NA   
## [45] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [56] NA
malaria <- matrix(num_values[9:88], ncol = 8, byrow = TRUE)
malaria <- as.numeric(malaria)
malaria
##  [1] 555 446 480  70 800 770 607 483 444 322   0   0 450 280 800 340   0
## [18]   0 600   0 517 432 400 340 400 850 561 469 405 302   0   0 400 750
## [35] 800 600   0   0   0   0 489 413 300 520 100 420 529 451 371 283   0
## [52]   0 320 660 200 860   0   0   0   0 467 403 460 160 600 510 500 435
## [69] 344 266   0   0 630 300 700 620   0   0   0   0
Region <- text[3:8]
malaria <- matrix(num_values[9:88], ncol = 8, byrow = TRUE)
colnames(malaria) <- years
rownames(malaria) <- c('AfricaA', 'AfricaB','America', 'Eastern Mediterranean','South-East Asia', 'Western Pacific', 'WorldA', 'WorldB', 'WorldC1','WorldC2' )
malaria
##                       2010  2011  2012  2013  2014  2015  2016  2017 
## AfricaA               "555" "000" "517" "000" "489" "000" "467" "000"
## AfricaB               "446" "000" "432" "000" "413" "000" "403" "000"
## America               "480" "450" "400" "400" "300" "320" "460" "630"
## Eastern Mediterranean "070" "280" "340" "750" "520" "660" "160" "300"
## South-East Asia       "800" "800" "400" "800" "100" "200" "600" "700"
## Western Pacific       "770" "340" "850" "600" "420" "860" "510" "620"
## WorldA                "607" "000" "561" "000" "529" "000" "500" "000"
## WorldB                "483" "000" "469" "000" "451" "000" "435" "000"
## WorldC1               "444" "600" "405" "000" "371" "000" "344" "000"
## WorldC2               "322" "000" "302" "000" "283" "000" "266" "000"

### Tidying