Synopsis

This is my report for week 4 assignment on data exploration, learning about the data and visualising it.The dataset being analysed contains data on life expectancy, GDP per capita and population by country.

Packages Required

This packages contains multiple packages within it with can be used for data representation and manipulation

library(tidyverse)
library(printr)
library(gapminder)

Source Code

Gapminder contains data on life expectancy, GDP per capita and population by country country- names of 142 countries continent-names of 5 continent year-ranges from 1952 to 2007 in increments of 5 years lifeExp- life expectancy at birth, in years pop- population gdpPercap - GDP per capita

Data Description

The number of rows and columns

ncol(gapminder_unfiltered)
## [1] 6
nrow(gapminder_unfiltered)
## [1] 3313

Names of Variables and their data type

names(gapminder_unfiltered)
## [1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
str(gapminder_unfiltered)
## Classes 'tbl_df', 'tbl' and 'data.frame':    3313 obs. of  6 variables:
##  $ country  : Factor w/ 187 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent: Factor w/ 6 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
##  $ pop      : int  8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap: num  779 821 853 836 740 ...

A sneek peak into the dataset

head(gapminder_unfiltered)
country continent year lifeExp pop gdpPercap
Afghanistan Asia 1952 28.801 8425333 779.4453
Afghanistan Asia 1957 30.332 9240934 820.8530
Afghanistan Asia 1962 31.997 10267083 853.1007
Afghanistan Asia 1967 34.020 11537966 836.1971
Afghanistan Asia 1972 36.088 13079460 739.9811
Afghanistan Asia 1977 38.438 14880372 786.1134
tail(gapminder_unfiltered)
country continent year lifeExp pop gdpPercap
Zimbabwe Africa 1982 60.363 7636524 788.8550
Zimbabwe Africa 1987 62.351 9216418 706.1573
Zimbabwe Africa 1992 60.377 10704340 693.4208
Zimbabwe Africa 1997 46.809 11404948 792.4500
Zimbabwe Africa 2002 39.989 11926563 672.0386
Zimbabwe Africa 2007 43.487 12311143 469.7093
Checking fo r missing va lues
sum(is.na(gapminder_unfiltered))
## [1] 0

Basic Statistics

summary(gapminder_unfiltered)
country continent year lifeExp pop gdpPercap
Czech Republic: 58 Africa : 637 Min. :1950 Min. :23.60 Min. :5.941e+04 Min. : 241.2
Denmark : 58 Americas: 470 1st Qu.:1967 1st Qu.:58.33 1st Qu.:2.680e+06 1st Qu.: 2505.3
Finland : 58 Asia : 578 Median :1982 Median :69.61 Median :7.560e+06 Median : 7825.8
Iceland : 58 Europe :1302 Mean :1980 Mean :65.24 Mean :3.177e+07 Mean : 11313.8
Japan : 58 FSU : 139 3rd Qu.:1996 3rd Qu.:73.66 3rd Qu.:1.961e+07 3rd Qu.: 17355.8
Netherlands : 58 Oceania : 187 Max. :2007 Max. :82.67 Max. :1.319e+09 Max. :113523.1
(Other) :2965 NA NA NA NA NA

Exploratory Data Analysis

1.For the year 2007, what is the distribution of GDP per capita across all countries?

gapminder_filtered <- filter(gapminder_unfiltered, year==2007)
ggplot(gapminder_filtered,aes(x=country, y=gdpPercap)) + geom_bar(stat = "identity") 

2.For the year 2007, how do the distributions differ across the different continents?

gap_unf <-with(gapminder_unfiltered, aggregate(x=gdpPercap, by=list(year,continent), FUN=mean))
names(gap_unf)[names(gap_unf) == 'x'] <- 'gdpPercap'
names(gap_unf)[names(gap_unf) == 'Group.1'] <- 'Year'
names(gap_unf)[names(gap_unf) == 'Group.2'] <- 'continent'
gap_unf_2007 <- filter(gap_unf, Year==2007)
ggplot(gap_unf_2007,aes(x=continent, y=gdpPercap)) + geom_bar(stat = "identity") 


3.For the year 2007, what are the top 10 countries with the largest GDP per capita?

gapminder_filtered <- filter(gapminder_unfiltered, year==2007)
sortedgap<-arrange(gapminder_filtered, desc(gdpPercap))
top10<-head(sortedgap,n=10)
top10$country <-factor(top10$country, levels=top10$country)
ggplot(top10,aes(x=country, y=gdpPercap)) + geom_bar(stat = "identity") 


4.Plot the GDP per capita for your country of origin for all years available.

gapminder_filtered <- filter(gapminder_unfiltered, country=="India")
gapminder_filtered$year <-factor(gapminder_filtered$year, levels=gapminder_filtered$year)
ggplot(gapminder_filtered,aes(x=year, y=gdpPercap)) + geom_bar(stat = "identity")


5.What was the percent growth (or decline) in GDP per capita in 2007?

gapminder_2007<- filter(gapminder_unfiltered, year==2007)
gapminder_2006<- filter(gapminder_unfiltered, year==2002)
combined<-merge(gapminder_2007,gapminder_2006, by="country")
combined$gdp.growth <- ((combined$gdpPercap.x-combined$gdpPercap.y)/combined$gdpPercap.y)*100
ggplot(combined,aes(x=country, y=gdp.growth)) + geom_bar(stat = "identity")


6.What has been the historical growth (or decline) in GDP per capita for your country?

gapminder_India<- filter(gapminder_unfiltered, country=="India")
gapminder_India <- gapminder_India %>% mutate(growth=((gdpPercap - lag(gdpPercap))/lag(gdpPercap))*100)
gapminder_India$year <-factor(gapminder_India$year, levels=gapminder_India$year)
gapminder_India <- filter(gapminder_India,!is.na(growth))
ggplot(gapminder_India,aes(x=year, y=growth)) + geom_bar(stat = "identity")