Get Rid Of Special Characters ] [ -
- Create standardized spacing in each value of DF for next pipe of tidying
- Use pipe to Tidy
Mean[min-max] columns into Mean,Min and Max columns for respected columns
getting_tidy <- as_data_frame(lapply(getting_tidy,function(x){ str_replace_all(x,"\\]|\\["," ")}))
getting_tidy <- as_data_frame(lapply(getting_tidy,function(x){ str_replace_all(x,"-"," ")}))
final_df <- getting_tidy %>%
separate(.,`Incidence of tuberculosis (per 100 000 population per year)`,into=c('Mean_Incidence_Tuberculosis_100,000','Min_Tuberculosis',"Max_Tuberculosis")) %>%
separate(.,`Antiretroviral therapy coverage among people with HIV infection eligible for ART according to 2010 guidelines (%)`,into=c('Mean_Antiretroviral_coverage','Min_Antiretroviral_coverage','Max_Antiretroviral_coverage')) %>%
separate(.,`Infant mortality rate (probability of dying between birth and age 1 per 1000 live births)`,into=c('Mean_Infant mortality_Rate/1000','Min_Infant_Mortality rate',"Max_Infant_Mortality_Rate"),sep= " ") %>%
separate(.,`Under-five mortality rate (probability of dying by age 5 per 1000 live births)`,into=c('Mean_Under-five_Mortality_Rate/1000','Min_Under-five_Mortality_Rate',"Max_Under-five_Mortality_Rate"), sep=" ") %>%
separate(.,`Tuberculosis treatment coverage`,into=c('Mean_Tuberculosis_Coverage','Min_Tuberculosis_Coverage',"Max_Tuberculosis_Coverage"))
## Warning: Expected 3 pieces. Additional pieces discarded in 56 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 3 pieces. Additional pieces discarded in 2 rows [8, 22].
## Warning: Expected 3 pieces. Missing pieces filled with `NA` in 2 rows [36,
## 50].
## Warning: Expected 3 pieces. Additional pieces discarded in 55 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, ...].
## Warning: Expected 3 pieces. Missing pieces filled with `NA` in 1 rows [18].
More Piping
- First column needs to be renamed to
Country column
- Eliminate the
.integers after the values in Country column
- Get rid of some columns
- Trim DF
- Convert columns to numeric for analysis
final_df %<>%
plyr::rename(.,c('United_states'= 'Country')) %>%
mutate(Country=rep(c("Uganda","Ukraine","United_Kingdom","United_States"),each=14)) %>%
select(colnames(.)[c(1,2,10,11,12,13,14,15,16,17,21,22,23,30,31,32,33,34,35)]) %>%
arrange(.,`Country`)
final_df[] <- lapply(final_df,function(x){ str_trim(x)})
final_df[3:19] <- lapply(final_df[3:19],function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
final_df <- as_data_frame(final_df)
kable(final_df)
| Uganda |
2015 |
NA |
NA |
202 |
120 |
304 |
39.2 |
35.4 |
43.3 |
38 |
93 |
NA |
52 |
34 |
87 |
55.9 |
49.3 |
63.3 |
| Uganda |
2014 |
NA |
NA |
202 |
127 |
294 |
41.0 |
37.8 |
44.5 |
38 |
97 |
0 |
56 |
39 |
89 |
60.1 |
54.4 |
66.4 |
| Uganda |
2005 |
890 |
NA |
233 |
156 |
325 |
74.4 |
70.3 |
78.7 |
38 |
155 |
NA |
62 |
44 |
92 |
120.3 |
112.8 |
128.3 |
| Uganda |
2004 |
840 |
NA |
240 |
169 |
323 |
80.5 |
76.3 |
85.0 |
39 |
164 |
NA |
66 |
49 |
94 |
131.3 |
123.5 |
139.6 |
| Uganda |
2003 |
800 |
NA |
248 |
172 |
336 |
86.7 |
82.0 |
91.4 |
39 |
173 |
NA |
65 |
48 |
94 |
142.5 |
133.9 |
151.3 |
| Uganda |
2002 |
760 |
NA |
256 |
171 |
358 |
92.5 |
87.5 |
97.5 |
40 |
181 |
NA |
62 |
44 |
92 |
153.1 |
143.8 |
162.6 |
| Uganda |
2013 |
NA |
NA |
203 |
134 |
288 |
43.9 |
40.8 |
47.2 |
38 |
103 |
0 |
60 |
42 |
91 |
65.0 |
59.4 |
71.1 |
| Uganda |
2012 |
1320 |
NA |
205 |
135 |
289 |
46.5 |
43.2 |
50.0 |
38 |
109 |
NA |
60 |
43 |
91 |
70.2 |
64.1 |
76.8 |
| Uganda |
2011 |
1310 |
NA |
207 |
143 |
283 |
49.5 |
46.1 |
53.3 |
38 |
114 |
NA |
64 |
47 |
92 |
75.5 |
69.1 |
82.3 |
| Uganda |
2010 |
1240 |
5 |
210 |
139 |
296 |
52.7 |
49.2 |
56.4 |
38 |
119 |
NA |
60 |
43 |
91 |
81.0 |
74.6 |
87.7 |
| Uganda |
2009 |
1200 |
4 |
213 |
140 |
302 |
55.7 |
52.2 |
59.3 |
38 |
124 |
NA |
60 |
42 |
91 |
86.4 |
80.1 |
93.0 |
| Uganda |
2008 |
1150 |
NA |
217 |
145 |
304 |
58.9 |
55.4 |
62.5 |
38 |
130 |
NA |
61 |
44 |
92 |
92.6 |
86.2 |
99.4 |
| Uganda |
2007 |
1070 |
NA |
222 |
146 |
313 |
63.3 |
59.6 |
67.3 |
38 |
137 |
NA |
60 |
43 |
92 |
100.5 |
93.6 |
107.7 |
| Uganda |
2006 |
990 |
NA |
227 |
150 |
319 |
68.6 |
64.7 |
72.7 |
38 |
145 |
NA |
61 |
43 |
92 |
109.8 |
102.7 |
117.3 |
| Ukraine |
2015 |
NA |
NA |
91 |
59 |
130 |
8.1 |
7.8 |
8.4 |
3 |
5 |
NA |
74 |
52 |
110 |
9.4 |
9.1 |
9.8 |
| Ukraine |
2014 |
NA |
NA |
94 |
61 |
135 |
8.4 |
8.1 |
8.7 |
3 |
5 |
0 |
75 |
52 |
120 |
9.8 |
9.5 |
10.1 |
| Ukraine |
2005 |
6410 |
87 |
127 |
82 |
181 |
12.5 |
12.0 |
12.9 |
4 |
6 |
NA |
67 |
47 |
100 |
14.5 |
13.9 |
15.0 |
| Ukraine |
2004 |
6000 |
87 |
127 |
82 |
182 |
13.0 |
12.3 |
13.6 |
4 |
6 |
NA |
NA |
NA |
NA |
15.1 |
14.3 |
15.8 |
| Ukraine |
2003 |
5170 |
88 |
126 |
81 |
180 |
13.7 |
12.8 |
14.5 |
4 |
6 |
NA |
130 |
91 |
200 |
15.9 |
14.8 |
16.8 |
| Ukraine |
2002 |
4590 |
89 |
123 |
80 |
176 |
14.3 |
13.2 |
15.4 |
4 |
7 |
NA |
68 |
48 |
110 |
16.7 |
15.4 |
18.0 |
| Ukraine |
2013 |
NA |
88 |
96 |
62 |
138 |
8.8 |
8.5 |
9.0 |
3 |
5 |
0 |
84 |
59 |
130 |
10.2 |
9.9 |
10.5 |
| Ukraine |
2012 |
8670 |
89 |
101 |
65 |
144 |
9.2 |
8.9 |
9.4 |
3 |
5 |
NA |
90 |
63 |
140 |
10.7 |
10.3 |
11.0 |
| Ukraine |
2011 |
8170 |
90 |
105 |
68 |
150 |
9.6 |
9.3 |
9.9 |
3 |
5 |
NA |
71 |
50 |
110 |
11.2 |
10.8 |
11.5 |
| Ukraine |
2010 |
7590 |
94 |
110 |
71 |
157 |
10.1 |
9.8 |
10.4 |
3 |
6 |
NA |
67 |
47 |
100 |
11.7 |
11.4 |
12.1 |
| Ukraine |
2009 |
7130 |
94 |
115 |
74 |
164 |
10.6 |
10.3 |
10.9 |
4 |
6 |
NA |
68 |
48 |
110 |
12.3 |
12.0 |
12.7 |
| Ukraine |
2008 |
8370 |
87 |
119 |
77 |
170 |
11.1 |
10.8 |
11.4 |
4 |
6 |
NA |
69 |
48 |
110 |
12.9 |
12.5 |
13.2 |
| Ukraine |
2007 |
7930 |
87 |
123 |
79 |
175 |
11.6 |
11.3 |
11.9 |
4 |
6 |
NA |
66 |
46 |
100 |
13.4 |
13.1 |
13.8 |
| Ukraine |
2006 |
7110 |
87 |
125 |
81 |
179 |
12.0 |
11.7 |
12.3 |
4 |
6 |
NA |
71 |
49 |
110 |
14.0 |
13.6 |
14.3 |
| United_Kingdom |
2015 |
NA |
NA |
10 |
9 |
1 |
3.7 |
3.6 |
3.9 |
2 |
4 |
NA |
89 |
81 |
98 |
4.4 |
4.3 |
4.6 |
| United_Kingdom |
2014 |
NA |
NA |
11 |
10 |
13 |
3.8 |
3.7 |
3.9 |
2 |
4 |
0 |
89 |
81 |
98 |
4.5 |
4.4 |
4.6 |
| United_Kingdom |
2005 |
33820 |
37 |
15 |
14 |
17 |
5.1 |
5.0 |
5.2 |
3 |
4 |
NA |
89 |
81 |
98 |
6.0 |
5.9 |
6.2 |
| United_Kingdom |
2004 |
32430 |
39 |
13 |
12 |
15 |
5.3 |
5.2 |
5.4 |
2 |
4 |
NA |
89 |
81 |
98 |
6.2 |
6.0 |
6.3 |
| United_Kingdom |
2003 |
30450 |
40 |
13 |
12 |
15 |
5.4 |
5.3 |
5.4 |
2 |
4 |
NA |
89 |
81 |
98 |
6.3 |
6.1 |
6.4 |
| United_Kingdom |
2002 |
29390 |
40 |
13 |
12 |
14 |
5.4 |
5.3 |
5.5 |
2 |
4 |
NA |
89 |
81 |
98 |
6.3 |
6.2 |
6.5 |
| United_Kingdom |
2013 |
NA |
28 |
13 |
12 |
14 |
3.9 |
3.8 |
4.0 |
2 |
4 |
0 |
89 |
81 |
98 |
4.6 |
4.5 |
4.7 |
| United_Kingdom |
2012 |
34640 |
28 |
14 |
13 |
16 |
4.1 |
4.0 |
4.2 |
2 |
4 |
NA |
89 |
81 |
98 |
4.8 |
4.7 |
4.9 |
| United_Kingdom |
2011 |
35270 |
29 |
15 |
13 |
16 |
4.2 |
4.2 |
4.3 |
2 |
4 |
NA |
89 |
81 |
98 |
5.0 |
4.9 |
5.1 |
| United_Kingdom |
2010 |
34510 |
30 |
14 |
13 |
15 |
4.4 |
4.3 |
4.5 |
2 |
4 |
NA |
89 |
81 |
98 |
5.2 |
5.1 |
5.3 |
| United_Kingdom |
2009 |
35260 |
33 |
15 |
13 |
16 |
4.6 |
4.5 |
4.7 |
2 |
4 |
NA |
89 |
81 |
98 |
5.4 |
5.3 |
5.5 |
| United_Kingdom |
2008 |
37110 |
34 |
15 |
13 |
16 |
4.8 |
4.7 |
4.9 |
2 |
4 |
NA |
89 |
81 |
98 |
5.6 |
5.5 |
5.7 |
| United_Kingdom |
2007 |
36480 |
34 |
15 |
13 |
16 |
4.9 |
4.8 |
5.0 |
3 |
4 |
NA |
89 |
81 |
98 |
5.8 |
5.6 |
5.9 |
| United_Kingdom |
2006 |
35620 |
36 |
15 |
14 |
17 |
5.0 |
4.9 |
5.1 |
3 |
4 |
NA |
89 |
81 |
98 |
5.9 |
5.8 |
6.0 |
| United_States |
2015 |
NA |
NA |
3 |
3 |
2 |
5.7 |
5.4 |
5.9 |
15 |
26 |
NA |
87 |
75 |
100 |
6.6 |
6.4 |
6.9 |
| United_States |
2014 |
NA |
NA |
3 |
2 |
2 |
5.8 |
5.7 |
6.0 |
16 |
27 |
0 |
87 |
75 |
100 |
6.8 |
6.6 |
7.0 |
| United_States |
2005 |
44740 |
NA |
5 |
5 |
4 |
6.8 |
6.7 |
6.9 |
19 |
33 |
NA |
87 |
75 |
100 |
8.0 |
7.8 |
8.1 |
| United_States |
2004 |
42260 |
NA |
5 |
7 |
4 |
6.9 |
6.8 |
7.0 |
19 |
33 |
NA |
87 |
75 |
100 |
8.1 |
7.9 |
8.2 |
| United_States |
2003 |
39960 |
NA |
5 |
9 |
5 |
6.9 |
6.7 |
7.0 |
19 |
33 |
NA |
87 |
75 |
100 |
8.1 |
8.0 |
8.3 |
| United_States |
2002 |
38590 |
NA |
6 |
5 |
2 |
6.9 |
6.8 |
7.0 |
19 |
33 |
NA |
87 |
75 |
100 |
8.2 |
8.1 |
8.4 |
| United_States |
2013 |
NA |
29 |
3 |
3 |
2 |
5.9 |
5.8 |
6.0 |
16 |
27 |
0 |
87 |
75 |
100 |
6.9 |
6.8 |
7.1 |
| United_States |
2012 |
52620 |
29 |
3 |
7 |
3 |
6.0 |
5.9 |
6.1 |
16 |
28 |
NA |
87 |
75 |
100 |
7.0 |
6.9 |
7.2 |
| United_States |
2011 |
50860 |
29 |
3 |
9 |
3 |
6.1 |
6.0 |
6.3 |
16 |
29 |
NA |
87 |
75 |
100 |
7.2 |
7.1 |
7.3 |
| United_States |
2010 |
48880 |
30 |
4 |
2 |
3 |
6.2 |
6.1 |
6.3 |
17 |
30 |
NA |
87 |
75 |
100 |
7.3 |
7.2 |
7.5 |
| United_States |
2009 |
47240 |
31 |
4 |
3 |
3 |
6.4 |
6.3 |
6.5 |
17 |
31 |
NA |
87 |
75 |
100 |
7.5 |
7.4 |
7.6 |
| United_States |
2008 |
48650 |
NA |
4 |
9 |
4 |
6.5 |
6.4 |
6.6 |
17 |
31 |
NA |
87 |
75 |
100 |
7.6 |
7.5 |
7.8 |
| United_States |
2007 |
48420 |
NA |
5 |
1 |
4 |
6.6 |
6.5 |
6.7 |
18 |
32 |
NA |
87 |
75 |
100 |
7.8 |
7.6 |
7.9 |
| United_States |
2006 |
47390 |
NA |
5 |
3 |
4 |
6.7 |
6.6 |
6.8 |
18 |
33 |
NA |
87 |
75 |
100 |
7.9 |
7.7 |
8.0 |
Visual Exploratory Analysis Of Infant Mortality
- GNP By Year
- Infant Mortality Rate by year
- Numerical vector of Infant Mortality Rate by year
final_df %>%
mutate(GNP=as.numeric(`Gross national income per capita (PPP int. $)`)) %>%
dplyr::group_by(Country) %>%
ggplot(., aes(x=Year ,y=GNP))+
geom_bar(aes(fill = Country), position = "dodge", stat = "identity")+
theme(axis.text.x=element_text(angle=45,hjust=1))+
labs(title="GNP By Year")
## Warning: Removed 12 rows containing missing values (geom_bar).

final_df %>%
dplyr::group_by(Country) %>%
ggplot(., aes(x=Year ,y=`Mean_Infant mortality_Rate/1000`))+
geom_bar(aes(fill = Country), position = "dodge", stat = "identity")+
theme(axis.text.x=element_text(angle=45,hjust=1))

final_df_display <- final_df %>%
arrange(Country,Year) %>%
select(.,Country,`Mean_Infant mortality_Rate/1000`)
- Uganda seems to have reduced it’s infant mortality rate by over 50%
Display Of Infant Mortality With The 3 Samples Closer In Scale
final_df %>%
dplyr::filter(., Country %in% c("United_Kingdom", "Ukraine","United_States")) %>%
dplyr::group_by(Country) %>%
ggplot(., aes(x=Year ,y=`Mean_Infant mortality_Rate/1000`))+
geom_bar(aes(fill = Country), position = "dodge", stat = "identity")+
theme(axis.text.x=element_text(angle=45,hjust=1))

## # A tibble: 56 x 19
## Coun~ Year `Gro~ `Hos~ `Mea~ Min_~ Max_~ `Mea~ `Min~ Max_~ `Num~ `Num~
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ugan~ 2015 NA NA 202 120 304 39.2 35.4 43.3 38.0 93.0
## 2 Ugan~ 2014 NA NA 202 127 294 41.0 37.8 44.5 38.0 97.0
## 3 Ugan~ 2005 890 NA 233 156 325 74.4 70.3 78.7 38.0 155
## 4 Ugan~ 2004 840 NA 240 169 323 80.5 76.3 85.0 39.0 164
## 5 Ugan~ 2003 800 NA 248 172 336 86.7 82.0 91.4 39.0 173
## 6 Ugan~ 2002 760 NA 256 171 358 92.5 87.5 97.5 40.0 181
## 7 Ugan~ 2013 NA NA 203 134 288 43.9 40.8 47.2 38.0 103
## 8 Ugan~ 2012 1320 NA 205 135 289 46.5 43.2 50.0 38.0 109
## 9 Ugan~ 2011 1310 NA 207 143 283 49.5 46.1 53.3 38.0 114
## 10 Ugan~ 2010 1240 5.00 210 139 296 52.7 49.2 56.4 38.0 119
## # ... with 46 more rows, and 7 more variables: `Poliomyelitis - number of
## # reported cases` <dbl>, Mean_Tuberculosis_Coverage <dbl>,
## # Min_Tuberculosis_Coverage <dbl>, Max_Tuberculosis_Coverage <dbl>,
## # `Mean_Under-five_Mortality_Rate/1000` <dbl>,
## # `Min_Under-five_Mortality_Rate` <dbl>, `Max_Under-five_Mortality_Rate`
## # <dbl>
- Ukraine has seen a reduction of around 40%
- US and UK have also seen a reduction, however UK seems to have a more substantial reduction(30%) over the US reduction of (17%)
Look At Tuberculosis
## # A tibble: 56 x 19
## Coun~ Year `Gro~ `Hos~ `Mea~ Min_~ Max_~ `Mea~ `Min~ Max_~ `Num~ `Num~
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ugan~ 2015 NA NA 202 120 304 39.2 35.4 43.3 38.0 93.0
## 2 Ugan~ 2014 NA NA 202 127 294 41.0 37.8 44.5 38.0 97.0
## 3 Ugan~ 2005 890 NA 233 156 325 74.4 70.3 78.7 38.0 155
## 4 Ugan~ 2004 840 NA 240 169 323 80.5 76.3 85.0 39.0 164
## 5 Ugan~ 2003 800 NA 248 172 336 86.7 82.0 91.4 39.0 173
## 6 Ugan~ 2002 760 NA 256 171 358 92.5 87.5 97.5 40.0 181
## 7 Ugan~ 2013 NA NA 203 134 288 43.9 40.8 47.2 38.0 103
## 8 Ugan~ 2012 1320 NA 205 135 289 46.5 43.2 50.0 38.0 109
## 9 Ugan~ 2011 1310 NA 207 143 283 49.5 46.1 53.3 38.0 114
## 10 Ugan~ 2010 1240 5.00 210 139 296 52.7 49.2 56.4 38.0 119
## # ... with 46 more rows, and 7 more variables: `Poliomyelitis - number of
## # reported cases` <dbl>, Mean_Tuberculosis_Coverage <dbl>,
## # Min_Tuberculosis_Coverage <dbl>, Max_Tuberculosis_Coverage <dbl>,
## # `Mean_Under-five_Mortality_Rate/1000` <dbl>,
## # `Min_Under-five_Mortality_Rate` <dbl>, `Max_Under-five_Mortality_Rate`
## # <dbl>
final_df %>%
dplyr::filter(., Country %in% c("Uganda", "Ukraine","United_Kingdom","United_States")) %>%
dplyr::group_by(Country) %>%
ggplot(., aes(x=Year ,y=`Mean_Incidence_Tuberculosis_100,000`))+
geom_bar(aes(fill = Country), position = "dodge", stat = "identity")+
theme(axis.text.x=element_text(angle=45,hjust=1))

- Uganda seems to have seen some moderate improvement over the past 13 years in tuberculosis, but the rate is relatively high still
- Ukraine seems to have had a similar improvement