The purpose of this markdown file is to understand the inbuilt data of the package gapminder and see the distribution of a variable and perform various other queries on it, plot graphs and calculate growth/decline of a variable asked for a particlar year or country.
library (gapminder) ## to pick the inbuilt data in this package
library (dplyr) ## to use various fuctions like filter, arrange etc
library(ggplot2) ## to plot histograms and facet
?gapminder_unfiltered
# The main data frame gapminder has 3313 rows and 6 variables and is not filtered on year:
# country
# factor with 142 levels
# continent
# factor with 5 levels
# year
# ranges from 1952 to 2007 in increments of 5 years
# lifeExp
# life expectancy at birth, in years
# pop
# population
# gdpPercap
# GDP per capita
nrow(gapminder_unfiltered)
## [1] 3313
ncol(gapminder_unfiltered)
## [1] 6
summary(gapminder_unfiltered)
## country continent year lifeExp
## Czech Republic: 58 Africa : 637 Min. :1950 Min. :23.60
## Denmark : 58 Americas: 470 1st Qu.:1967 1st Qu.:58.33
## Finland : 58 Asia : 578 Median :1982 Median :69.61
## Iceland : 58 Europe :1302 Mean :1980 Mean :65.24
## Japan : 58 FSU : 139 3rd Qu.:1996 3rd Qu.:73.66
## Netherlands : 58 Oceania : 187 Max. :2007 Max. :82.67
## (Other) :2965
## pop gdpPercap
## Min. :5.941e+04 Min. : 241.2
## 1st Qu.:2.680e+06 1st Qu.: 2505.3
## Median :7.560e+06 Median : 7825.8
## Mean :3.177e+07 Mean : 11313.8
## 3rd Qu.:1.961e+07 3rd Qu.: 17355.8
## Max. :1.319e+09 Max. :113523.1
##
sum(is.na(gapminder_unfiltered)) ## no null values in data
## [1] 0
table(gapminder_unfiltered$year) ## shows the data is not present for every year equally
##
## 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964
## 39 24 144 24 24 24 24 144 25 25 26 26 151 26 26
## 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979
## 27 27 156 27 27 27 27 168 32 27 27 27 171 27 27
## 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994
## 27 27 171 27 27 27 27 171 27 27 32 33 183 33 33
## 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007
## 33 33 184 33 33 33 33 187 33 32 30 18 183
table(gapminder_unfiltered$country) ## shows data isnt present for all countries for all years.
##
## Afghanistan Albania Algeria
## 12 12 12
## Angola Argentina Armenia
## 12 12 4
## Aruba Australia Austria
## 8 56 57
## Azerbaijan Bahamas Bahrain
## 4 10 12
## Bangladesh Barbados Belarus
## 12 10 18
## Belgium Belize Benin
## 57 11 12
## Bhutan Bolivia Bosnia and Herzegovina
## 8 12 12
## Botswana Brazil Brunei
## 12 12 8
## Bulgaria Burkina Faso Burundi
## 57 12 12
## Cambodia Cameroon Canada
## 12 12 57
## Cape Verde Central African Republic Chad
## 11 12 12
## Chile China Colombia
## 12 36 12
## Comoros Congo, Dem. Rep. Congo, Rep.
## 12 12 12
## Costa Rica Cote d'Ivoire Croatia
## 13 12 12
## Cuba Cyprus Czech Republic
## 13 8 58
## Denmark Djibouti Dominican Republic
## 58 12 12
## Ecuador Egypt El Salvador
## 12 12 12
## Equatorial Guinea Eritrea Estonia
## 12 12 18
## Ethiopia Fiji Finland
## 12 10 58
## France French Guiana French Polynesia
## 57 1 9
## Gabon Gambia Georgia
## 12 12 9
## Germany Ghana Greece
## 26 12 13
## Grenada Guadeloupe Guatemala
## 8 1 12
## Guinea Guinea-Bissau Guyana
## 12 12 10
## Haiti Honduras Hong Kong, China
## 12 12 12
## Hungary Iceland India
## 57 58 12
## Indonesia Iran Iraq
## 12 12 12
## Ireland Israel Italy
## 13 12 56
## Jamaica Japan Jordan
## 12 58 12
## Kazakhstan Kenya Korea, Dem. Rep.
## 4 12 12
## Korea, Rep. Kuwait Latvia
## 12 12 42
## Lebanon Lesotho Liberia
## 12 12 12
## Libya Lithuania Luxembourg
## 13 18 49
## Macao, China Madagascar Malawi
## 8 12 12
## Malaysia Maldives Mali
## 12 8 12
## Malta Martinique Mauritania
## 10 1 12
## Mauritius Mexico Micronesia, Fed. Sts.
## 12 13 8
## Moldova Mongolia Montenegro
## 5 12 12
## Morocco Mozambique Myanmar
## 12 12 12
## Namibia Nepal Netherlands
## 12 12 58
## Netherlands Antilles New Caledonia New Zealand
## 8 9 55
## Nicaragua Niger Nigeria
## 12 12 12
## Norway Oman Pakistan
## 58 12 12
## Panama Papua New Guinea Paraguay
## 12 10 12
## Peru Philippines Poland
## 12 12 52
## Portugal Puerto Rico Qatar
## 58 13 8
## Reunion Romania Russia
## 12 12 20
## Rwanda Samoa Sao Tome and Principe
## 12 7 12
## Saudi Arabia Senegal Serbia
## 12 12 12
## Sierra Leone Singapore Slovak Republic
## 12 12 58
## Slovenia Solomon Islands Somalia
## 32 9 12
## South Africa Spain Sri Lanka
## 12 58 13
## Sudan Suriname Swaziland
## 12 8 12
## Sweden Switzerland Syria
## 58 58 12
## Taiwan Tajikistan Tanzania
## 58 4 12
## Thailand Timor-Leste Togo
## 13 4 12
## Tonga Trinidad and Tobago Tunisia
## 7 12 12
## Turkey Turkmenistan Uganda
## 12 4 13
## Ukraine United Arab Emirates United Kingdom
## 20 8 13
## United States Uruguay Uzbekistan
## 57 12 4
## Vanuatu Venezuela Vietnam
## 7 12 12
## West Bank and Gaza Yemen, Rep. Zambia
## 12 12 12
## Zimbabwe
## 12
unique(gapminder_unfiltered$continent)
## [1] Asia Europe Africa Americas FSU Oceania
## Levels: Africa Americas Asia Europe FSU Oceania
### 1. For the year 2007, what is the distribution of GDP per capita across all countries?
data_2007 <- gapminder_unfiltered %>%
filter(year == 2007)
data_2007
## # A tibble: 183 × 6
## country continent year lifeExp pop gdpPercap
## <fctr> <fctr> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 2007 43.828 31889923 974.5803
## 2 Albania Europe 2007 76.423 3600523 5937.0295
## 3 Algeria Africa 2007 72.301 33333216 6223.3675
## 4 Angola Africa 2007 42.731 12420476 4797.2313
## 5 Argentina Americas 2007 75.320 40301927 12779.3796
## 6 Armenia FSU 2007 71.965 2971650 4942.5439
## 7 Aruba Americas 2007 74.239 72194 27230.6752
## 8 Australia Oceania 2007 81.235 20434176 34435.3674
## 9 Austria Europe 2007 79.829 8199783 36126.4927
## 10 Azerbaijan Asia 2007 67.487 8017309 7708.6112
## # ... with 173 more rows
data_2007 %>%
ggplot() +
geom_histogram(mapping = aes(x = gdpPercap),
bins = 100)
### 2. For the year 2007, how do the distributions differ across the different continents?
ggplot(data = data_2007) +
geom_histogram(mapping = aes(x = gdpPercap)) +
facet_wrap(~ continent, nrow = 2)
### 3. For the year 2007, what are the top 10 countries with the largest GDP per capita?
head(arrange(data_2007, desc(gdpPercap)),10)
## # A tibble: 10 × 6
## country continent year lifeExp pop gdpPercap
## <fctr> <fctr> <int> <dbl> <int> <dbl>
## 1 Qatar Asia 2007 75.588 907229 82010.98
## 2 Macao, China Asia 2007 80.718 456989 54589.82
## 3 Norway Europe 2007 80.196 4627926 49357.19
## 4 Brunei Asia 2007 77.118 386511 48014.59
## 5 Kuwait Asia 2007 77.588 2505559 47306.99
## 6 Singapore Asia 2007 79.972 4553009 47143.18
## 7 United States Americas 2007 78.242 301139947 42951.65
## 8 Ireland Europe 2007 78.885 4109086 40676.00
## 9 Hong Kong, China Asia 2007 82.208 6980412 39724.98
## 10 Switzerland Europe 2007 81.701 7554661 37506.42
### 4. Plot the GDP per capita for your country of origin for all years available.
data_India<-filter(gapminder_unfiltered, country == 'India')
ggplot(data_India) +
geom_point(mapping = aes(x = year, y = gdpPercap))
### 5. What was the percent growth (or decline) in GDP per capita in 2007?
gapminder_unfiltered %>%
filter(country == 'India') %>%
arrange(year) %>%
mutate(change = (gdpPercap - lag(gdpPercap))/lag(gdpPercap) * 100) %>%
filter(year == 2007) %>%
select(year, change)
## # A tibble: 1 × 2
## year change
## <int> <dbl>
## 1 2007 40.38546
### What has been the historical growth (or decline) in GDP per capita for your country?
gapminder_unfiltered %>%
filter(country == 'India') %>%
arrange(year) %>%
select(year, gdpPercap) %>%
summarize(abc = last(gdpPercap) - first(gdpPercap))
## # A tibble: 1 × 1
## abc
## <dbl>
## 1 1905.645