Read in file from source.
MALARIA <- readr::read_csv("https://raw.githubusercontent.com/sadia-perveen/Project-2/master/Child-health_malaria_CoD_Feb-2018_WHO_MCEE_-1.csv")
## Warning: Missing column names filled in: 'X1' [1], 'X2' [2], 'X3' [3],
## 'X4' [4], 'X5' [5], 'X6' [6], 'X7' [7], 'X8' [8], 'X9' [9], 'X10' [10],
## 'X11' [11], 'X12' [12], 'X13' [13], 'X14' [14]
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_character(),
## X5 = col_character(),
## X6 = col_character(),
## X7 = col_character(),
## X8 = col_character(),
## X9 = col_character(),
## X10 = col_character(),
## X11 = col_character(),
## X12 = col_character(),
## X13 = col_character(),
## X14 = col_character()
## )
head(MALARIA)
## # A tibble: 6 x 14
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2 Estim~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 3 Last ~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> Tota~ Tota~ Neon~ Post~ Unde~ Neon~ Post~ Unde~ % ne~
## 5 iso3 Count~ year nnd pnd neo8 post8 ufiv~ rneo8 rpos~ rufi~ fneo8
## 6 <NA> Global 2000 4,00~ 6,04~ - 639,~ 639,~ - 5 5 0%
## # ... with 2 more variables: X13 <chr>, X14 <chr>
Data cleanup: 1. Get rid on first column, its not needed. 2. Update values x2 and x3 row 4 based values in row 5. 3. Drop all columns/rows with na’s using drop_na function. 4. Make the first row values in the dataset as the column names. 5. Get rid of 1st two rows using slice. 6. Replace all ‘-’ with 0 using mutate_if function.
MALARIA <- select(MALARIA, -c(X1))
MALARIA$X2[4] <- MALARIA$X2[5]
MALARIA$X3[4] <- MALARIA$X3[5]
MALARIA <- MALARIA %>% drop_na()
names(MALARIA) <- as.character(unlist(MALARIA[1,]))
MALARIA <- slice(MALARIA, 3:n())
MALARIA <- mutate_if(MALARIA,
is.character,
str_replace_all, pattern = "-", replacement = "0")
Generate some comparison data, in this case total deaths neanatal and post neonatal based on country for all years. Dividing totals by 10000 to make the numbers readable.
DEATHS_COUNTRY <- MALARIA %>%
group_by(`Country/area name`) %>%
summarise(neoNatal = sum(as.integer(gsub(",", "", `Total Neonatal deaths`)))/10000,
postNeoNatal = sum(as.integer(gsub(",", "", `Total Post-Neonatal deaths`)))/10000) %>%
arrange(`Country/area name`)
Plot the data in a barplot.
barplot(t(as.matrix(DEATHS_COUNTRY[, 2:3])),
beside = TRUE,
names.arg = DEATHS_COUNTRY$`Country/area name`,
legend.text = TRUE,
las=2,
cex.names=.5,
main = "Deaths NeoNatal/Post Neo Natal By Contry (2000-2016)",
xlab = "Countries",
ylab = "# Deaths/10000")