This R Markdown file is created as part of my Week 4 Assignment requirement for Data Wrangling in R course taught at UC. This week I have done Exploratory Data Analysis on the Gapminder_unfiltered data set present in the Gapminder package.
I have used the following packages for creating this RMD file
library(printr) # for proper formatting while printing
library(tidyverse) # for creating visualzations
library(gapminder) # for getting the gapminder_unfiltered dataset
## Warning: package 'gapminder' was built under R version 3.3.2
?gapminder
| gapminder | R Documentation |
Excerpt of the Gapminder data on life expectancy, GDP per capita, and population by country.
gapminder
The main data frame gapminder has 1704 rows and 6 variables:
factor with 142 levels
factor with 5 levels
ranges from 1952 to 2007 in increments of 5 years
life expectancy at birth, in years
population
GDP per capita
The supplemental data frame gapminder_unfiltered was not filtered on year or for complete data and has 3313 rows.
http://www.gapminder.org/data/
country_colors for a nice color scheme for the countries
str(gapminder)
head(gapminder)
summary(gapminder)
table(gapminder$continent)
aggregate(lifeExp ~ continent, gapminder, median)
plot(lifeExp ~ year, gapminder, subset = country == "Cambodia", type = "b")
plot(lifeExp ~ gdpPercap, gapminder, subset = year == 2007, log = "x")
if (require("dplyr")) {
gapminder %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarise(lifeExp = median(lifeExp))
# how many unique countries does the data contain, by continent?
gapminder %>%
group_by(continent) %>%
summarize(n_obs = n(), n_countries = n_distinct(country))
# by continent, which country experienced the sharpest 5-year drop in
# life expectancy and what was the drop?
gapminder %>%
group_by(continent, country) %>%
select(country, year, continent, lifeExp) %>%
mutate(le_delta = lifeExp - lag(lifeExp)) %>%
summarize(worst_le_delta = min(le_delta, na.rm = TRUE)) %>%
filter(min_rank(worst_le_delta) < 2) %>%
arrange(worst_le_delta)
}
head(gapminder_unfiltered)
| country | continent | year | lifeExp | pop | gdpPercap |
|---|---|---|---|---|---|
| Afghanistan | Asia | 1952 | 28.801 | 8425333 | 779.4453 |
| Afghanistan | Asia | 1957 | 30.332 | 9240934 | 820.8530 |
| Afghanistan | Asia | 1962 | 31.997 | 10267083 | 853.1007 |
| Afghanistan | Asia | 1967 | 34.020 | 11537966 | 836.1971 |
| Afghanistan | Asia | 1972 | 36.088 | 13079460 | 739.9811 |
| Afghanistan | Asia | 1977 | 38.438 | 14880372 | 786.1134 |
summary(gapminder_unfiltered)
| country | continent | year | lifeExp | pop | gdpPercap | |
|---|---|---|---|---|---|---|
| Czech Republic: 58 | Africa : 637 | Min. :1950 | Min. :23.60 | Min. :5.941e+04 | Min. : 241.2 | |
| Denmark : 58 | Americas: 470 | 1st Qu.:1967 | 1st Qu.:58.33 | 1st Qu.:2.680e+06 | 1st Qu.: 2505.3 | |
| Finland : 58 | Asia : 578 | Median :1982 | Median :69.61 | Median :7.560e+06 | Median : 7825.8 | |
| Iceland : 58 | Europe :1302 | Mean :1980 | Mean :65.24 | Mean :3.177e+07 | Mean : 11313.8 | |
| Japan : 58 | FSU : 139 | 3rd Qu.:1996 | 3rd Qu.:73.66 | 3rd Qu.:1.961e+07 | 3rd Qu.: 17355.8 | |
| Netherlands : 58 | Oceania : 187 | Max. :2007 | Max. :82.67 | Max. :1.319e+09 | Max. :113523.1 | |
| (Other) :2965 | NA | NA | NA | NA | NA |
gapminder_copy<-gapminder_unfiltered
gapminder_2007<- gapminder_copy %>%
filter(year==2007) %>%
arrange(desc(gdpPercap))
select(gapminder_2007,country, gdpPercap)
| country | gdpPercap |
|---|---|
| Qatar | 82010.9780 |
| Macao, China | 54589.8202 |
| Norway | 49357.1902 |
| Brunei | 48014.5877 |
| Kuwait | 47306.9898 |
| Singapore | 47143.1796 |
| United States | 42951.6531 |
| Ireland | 40675.9964 |
| Hong Kong, China | 39724.9787 |
| Switzerland | 37506.4191 |
| United Arab Emirates | 36953.6837 |
| Netherlands | 36797.9333 |
| Canada | 36319.2350 |
| Iceland | 36180.7892 |
| Austria | 36126.4927 |
| Denmark | 35278.4187 |
| Australia | 34435.3674 |
| Sweden | 33859.7484 |
| Belgium | 33692.6051 |
| Finland | 33207.0844 |
| United Kingdom | 33203.2613 |
| Germany | 32170.3744 |
| Japan | 31656.0681 |
| New Caledonia | 31278.3450 |
| France | 30470.0167 |
| Bahrain | 29796.0483 |
| Spain | 28821.0637 |
| Taiwan | 28718.2768 |
| Italy | 28569.7197 |
| Greece | 27538.4119 |
| Aruba | 27230.6752 |
| French Polynesia | 26766.8449 |
| Cyprus | 26002.8720 |
| Slovenia | 25768.2576 |
| Israel | 25523.2771 |
| New Zealand | 25185.0091 |
| Bahamas | 23948.6375 |
| Korea, Rep. | 23348.1397 |
| Czech Republic | 22833.3085 |
| Netherlands Antilles | 22500.0010 |
| Oman | 22316.1929 |
| Malta | 21711.7269 |
| Saudi Arabia | 21654.8319 |
| Portugal | 20509.6478 |
| Estonia | 19967.4026 |
| Puerto Rico | 19328.7090 |
| Slovak Republic | 18678.3144 |
| Hungary | 18008.9444 |
| Trinidad and Tobago | 18008.5092 |
| Barbados | 17023.1024 |
| Lithuania | 16666.5087 |
| Latvia | 16539.4535 |
| Poland | 15389.9247 |
| Croatia | 14619.2227 |
| Russia | 13820.7114 |
| Gabon | 13206.4845 |
| Chile | 13171.6388 |
| Argentina | 12779.3796 |
| Botswana | 12569.8518 |
| Malaysia | 12451.6558 |
| Equatorial Guinea | 12154.0897 |
| Libya | 12057.4993 |
| Mexico | 11977.5750 |
| Iran | 11605.7145 |
| Venezuela | 11415.8057 |
| Mauritius | 10956.9911 |
| Romania | 10808.4756 |
| Bulgaria | 10680.7928 |
| Uruguay | 10611.4630 |
| Lebanon | 10461.0587 |
| Kazakhstan | 10312.7714 |
| Belarus | 10273.7742 |
| Panama | 9809.1856 |
| Serbia | 9786.5347 |
| Costa Rica | 9645.0614 |
| South Africa | 9269.6578 |
| Montenegro | 9253.8961 |
| Grenada | 9148.0080 |
| Brazil | 9065.8008 |
| Cuba | 8948.1029 |
| Turkey | 8458.2764 |
| Suriname | 7987.2714 |
| Azerbaijan | 7708.6112 |
| Reunion | 7670.1226 |
| Belize | 7550.2255 |
| Thailand | 7458.3963 |
| Bosnia and Herzegovina | 7446.2988 |
| Peru | 7408.9056 |
| Jamaica | 7320.8803 |
| Tunisia | 7092.9230 |
| Colombia | 7006.5804 |
| Ecuador | 6873.2623 |
| Ukraine | 6549.3718 |
| Algeria | 6223.3675 |
| Dominican Republic | 6025.3748 |
| Albania | 5937.0295 |
| El Salvador | 5728.3535 |
| Egypt | 5581.1810 |
| Micronesia, Fed. Sts. | 5422.1290 |
| Guatemala | 5186.0500 |
| Maldives | 5166.8217 |
| Samoa | 5143.6155 |
| Tonga | 5063.9758 |
| China | 4959.1149 |
| Armenia | 4942.5439 |
| Turkmenistan | 4889.2498 |
| Namibia | 4811.0604 |
| Angola | 4797.2313 |
| Bhutan | 4744.6400 |
| Jordan | 4519.4612 |
| Swaziland | 4513.4806 |
| Iraq | 4471.0619 |
| Georgia | 4386.5604 |
| Syria | 4184.5481 |
| Paraguay | 4172.8385 |
| Fiji | 4118.1583 |
| Sri Lanka | 3970.0954 |
| Bolivia | 3822.1371 |
| Morocco | 3820.1752 |
| Vanuatu | 3656.9072 |
| Congo, Rep. | 3632.5578 |
| Guyana | 3586.0426 |
| Honduras | 3548.3308 |
| Indonesia | 3540.6516 |
| Cape Verde | 3205.4794 |
| Philippines | 3190.4810 |
| Mongolia | 3095.7723 |
| West Bank and Gaza | 3025.3498 |
| Nicaragua | 2749.3210 |
| Pakistan | 2605.9476 |
| Moldova | 2604.7505 |
| Sudan | 2602.3950 |
| India | 2452.2104 |
| Vietnam | 2441.5764 |
| Timor-Leste | 2285.7380 |
| Yemen, Rep. | 2280.7699 |
| Uzbekistan | 2211.1589 |
| Djibouti | 2082.4816 |
| Cameroon | 2042.0952 |
| Nigeria | 2013.9773 |
| Solomon Islands | 1829.3174 |
| Papua New Guinea | 1827.0966 |
| Mauritania | 1803.1515 |
| Cambodia | 1713.7787 |
| Senegal | 1712.4721 |
| Chad | 1704.0637 |
| Sao Tome and Principe | 1598.4351 |
| Korea, Dem. Rep. | 1593.0655 |
| Lesotho | 1569.3314 |
| Tajikistan | 1562.4479 |
| Cote d’Ivoire | 1544.7501 |
| Kenya | 1463.2493 |
| Benin | 1441.2849 |
| Bangladesh | 1391.2538 |
| Ghana | 1327.6089 |
| Zambia | 1271.2116 |
| Burkina Faso | 1217.0330 |
| Haiti | 1201.6372 |
| Tanzania | 1107.4822 |
| Nepal | 1091.3598 |
| Uganda | 1056.3801 |
| Madagascar | 1044.7701 |
| Mali | 1042.5816 |
| Comoros | 986.1479 |
| Afghanistan | 974.5803 |
| Myanmar | 944.0000 |
| Guinea | 942.6542 |
| Somalia | 926.1411 |
| Togo | 882.9699 |
| Rwanda | 863.0885 |
| Sierra Leone | 862.5408 |
| Mozambique | 823.6856 |
| Malawi | 759.3499 |
| Gambia | 752.7497 |
| Central African Republic | 706.0165 |
| Ethiopia | 690.8056 |
| Eritrea | 641.3695 |
| Niger | 619.6769 |
| Guinea-Bissau | 579.2317 |
| Zimbabwe | 469.7093 |
| Burundi | 430.0707 |
| Liberia | 414.5073 |
| Congo, Dem. Rep. | 277.5519 |
ggplot(data=gapminder_2007) +
geom_bar(mapping=aes(x=country, y=gdpPercap),stat="identity", position ="dodge") +
coord_flip()
continent_data <- gapminder_2007 %>%
group_by(continent) %>%
summarise(gdppercap_continent = mean(gdpPercap, na.rm=TRUE))
select(continent_data, continent, gdppercap_continent)
| continent | gdppercap_continent |
|---|---|
| Africa | 3091.230 |
| Americas | 11940.902 |
| Asia | 15338.057 |
| Europe | 24174.153 |
| FSU | 9522.539 |
| Oceania | 13156.979 |
ggplot(data=continent_data) +
geom_bar(mapping=aes(x=continent, y=gdppercap_continent), stat="identity")
gapminder_2007 %>%
select(country, gdpPercap) %>%
arrange(desc(gdpPercap)) %>%
head(10)
| country | gdpPercap |
|---|---|
| Qatar | 82010.98 |
| Macao, China | 54589.82 |
| Norway | 49357.19 |
| Brunei | 48014.59 |
| Kuwait | 47306.99 |
| Singapore | 47143.18 |
| United States | 42951.65 |
| Ireland | 40676.00 |
| Hong Kong, China | 39724.98 |
| Switzerland | 37506.42 |
Country = India
gapminder_india <- gapminder_copy %>%
filter(country=="India") %>%
arrange(year)
ggplot(data=gapminder_india) +
geom_line(mapping=aes(x=year, y=gdpPercap))
gapminder_2007<-gapminder_2007 %>%
mutate(gdpgrowth= (gdpPercap - lag(gdpPercap))/lag(gdpPercap)*100) %>%
summarise( gdpGrowth =mean(gdpgrowth, na.rm=TRUE))
gapminder_india<-gapminder_india %>%
mutate(gdpgrowth= (gdpPercap - lag(gdpPercap))/lag(gdpPercap)*100)
gapminder_india %>%
mutate(gdpgrowth= (gdpPercap - lag(gdpPercap))/lag(gdpPercap)*100) %>%
summarise( histGrowth =mean(gdpgrowth, na.rm=TRUE))
15.055
ggplot(data=gapminder_india) +
geom_line(mapping=aes(x=year, y=gdpgrowth))