#Import packages & dataset
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
gapminder <- read.csv("C:/Users/User/Downloads/GAPMINDER DATASET/gapminder_full.csv", header = TRUE, sep = ",")
head(gapminder,10)
## country year population continent life_exp gdp_cap
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134
## 7 Afghanistan 1982 12881816 Asia 39.854 978.0114
## 8 Afghanistan 1987 13867957 Asia 40.822 852.3959
## 9 Afghanistan 1992 16317921 Asia 41.674 649.3414
## 10 Afghanistan 1997 22227415 Asia 41.763 635.3414
#To create a subset of the gapminder dataset with the year=1957
gapminder_1957 <- gapminder %>% filter(year == 1957)
#Plot a scatter plot of the population on the x-axis and the life expectancy on the y-axis
ggplot(gapminder_1957, aes(x = population, y = life_exp)) + geom_point()
#Plot a scatter plot of the population on the x-axis and the gdp per capita on the y-axis
ggplot(gapminder_1957, aes(x = population, y = gdp_cap)) + geom_point()
#Plot a scatter plot of the gdp per capita on the x-axis and the life expectancy on the y-axis
ggplot(gapminder_1957, aes(x = gdp_cap, y = life_exp)) + geom_point()
#Adding log scale
#Plot a scatter plot of the population on the y-axis and the life expectancy on the y-axis
ggplot(gapminder_1957, aes(x = population, y = life_exp)) + geom_point() + scale_x_log10()
Polpulation increases as life expectancy increases.
#Plot a scatter plot of the population on the y-axis and the gdp per capita on the y-axis
ggplot(gapminder_1957, aes(x = population, y = gdp_cap)) + geom_point() + scale_x_log10() + scale_y_log10()
Population rises with an increase in gdp/capita
#Using the color and size aesthetic
#Plot a scatter plot of the population on the y-axis and the life expectancy on the y-axis with color representing the continent
ggplot(gapminder_1957, aes(x = population, y = life_exp, color = continent)) +
geom_point() + scale_x_log10()
Africans have lower life expectancy. Europeans have higher life
expectancy.
#Using the color and size aesthetic
#Plot a scatter plot of the population on the y-axis and the life expectancy on the y-axis with color representing the continent and size representing the gdp per capita
ggplot(gapminder_1957, aes(x = population, y = life_exp, color = continent, size = gdp_cap)) +
geom_point() + scale_x_log10()
Africans had lower life expectancy with smaller gdp_cap. Asians had
bigger gdp_cap and a moderate life expectancy.
#Using faceting
#Plot a scatter plot comparing population on the x-axis and the life expectancy on the y-axis facetted by continent
ggplot(gapminder_1957, aes(x = population, y = life_exp)) + geom_point() + scale_x_log10() + facet_wrap(~continent)
#Plot a scatter plot of the gdp per capita on the x-axis and the life expectancy on the y-axis with color represented by continent, size represented by population and facetted by year
ggplot(gapminder, aes(x = gdp_cap, y = life_exp, color = continent, size = population)) +
geom_point() + scale_x_log10() + facet_wrap(~year)
Life expectancy and gdp_cap increase globally over the years. Asians
tend to have bigger gdp_cap.
#VISUALIZING WITH SUMMARIZED DATA
#creating a variable by_year that get the median life expectancy for each year
by_year <- gapminder %>% group_by(year) %>% summarise(medianLifeExp = median(life_exp))
head(by_year, 10)
## # A tibble: 10 × 2
## year medianLifeExp
## <int> <dbl>
## 1 1952 45.1
## 2 1957 48.4
## 3 1962 50.9
## 4 1967 53.8
## 5 1972 56.5
## 6 1977 59.7
## 7 1982 62.4
## 8 1987 65.8
## 9 1992 67.7
## 10 1997 69.4
#create a scatter plot to show the change in median life expectancy using the by_year data
ggplot(by_year, aes(x = year, y = medianLifeExp)) + geom_point() + expand_limits(y=0)
#expand_limits(y=0) function was added since the y-axis did not start from zero.
#To summarize median gdp per capita within each continent with by_year_continent
by_year_continent <- gapminder %>% group_by(year, continent) %>% summarise(medianGdpPercap = median(gdp_cap))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
head(by_year_continent, 10)
## # A tibble: 10 × 3
## # Groups: year [2]
## year continent medianGdpPercap
## <int> <chr> <dbl>
## 1 1952 Africa 987.
## 2 1952 Americas 3048.
## 3 1952 Asia 1207.
## 4 1952 Europe 5142.
## 5 1952 Oceania 10298.
## 6 1957 Africa 1024.
## 7 1957 Americas 3781.
## 8 1957 Asia 1548.
## 9 1957 Europe 6067.
## 10 1957 Oceania 11599.
#Plot a scatter plot median gdp per capita in each continent for by_year_continent (the expand_limits(y = 0) is because the y axis does not start from the origin).
ggplot(by_year_continent, aes(x = year, y = medianGdpPercap, color = continent)) +
geom_point() + expand_limits(y = 0)
The median Gdp_cap increased over the years. Though, Africa had the
lowest, while oceania had the highest.
#Summarize the median gdp per capita and median life expectancy by continent in 2007
by_continent_2007 <- gapminder %>% filter(year == 2007) %>% group_by(continent) %>% summarise(medianGdpPercap = median(gdp_cap), medianLifeExp = median(life_exp))
by_continent_2007
## # A tibble: 5 × 3
## continent medianGdpPercap medianLifeExp
## <chr> <dbl> <dbl>
## 1 Africa 1452. 52.9
## 2 Americas 8948. 72.9
## 3 Asia 4471. 72.4
## 4 Europe 28054. 78.6
## 5 Oceania 29810. 80.7
#Create the scatter plot of the by_continent_2007 data
ggplot(by_continent_2007, aes(x = medianGdpPercap, y = medianLifeExp, color = continent)) + geom_point() + expand_limits(y = 0)
#Creating Line Plots
#To summarize median gdp per capita by year
by_year_Gdp <- gapminder %>% group_by(year) %>% summarise(medianGdpPercap = median(gdp_cap))
head(by_year_Gdp, 10)
## # A tibble: 10 × 2
## year medianGdpPercap
## <int> <dbl>
## 1 1952 1969.
## 2 1957 2173.
## 3 1962 2335.
## 4 1967 2678.
## 5 1972 3339.
## 6 1977 3799.
## 7 1982 4216.
## 8 1987 4280.
## 9 1992 4386.
## 10 1997 4782.
#Create a line plot of the median gdp per capita for the year
ggplot(by_year_Gdp, aes(x = year, y = medianGdpPercap)) + geom_line() + expand_limits(y = 0)
#Summarize the median gpd per capita by year and continent
by_year_continent_Gdp <- gapminder %>% group_by(year, continent) %>% summarise(medianGdpPercap = median(gdp_cap))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
head(by_year_continent_Gdp, 10)
## # A tibble: 10 × 3
## # Groups: year [2]
## year continent medianGdpPercap
## <int> <chr> <dbl>
## 1 1952 Africa 987.
## 2 1952 Americas 3048.
## 3 1952 Asia 1207.
## 4 1952 Europe 5142.
## 5 1952 Oceania 10298.
## 6 1957 Africa 1024.
## 7 1957 Americas 3781.
## 8 1957 Asia 1548.
## 9 1957 Europe 6067.
## 10 1957 Oceania 11599.
#To create a line plot for the by_year_continent_Gdp data
ggplot(by_year_continent_Gdp, aes(x = year, y = medianGdpPercap, color = continent)) + geom_line() + expand_limits(y = 0)
#Creating Barplots
#Summarize the median gdp by continent in 1957
by_continent_1957 <- gapminder %>% filter(year == 1957) %>% group_by(continent) %>% summarise(medianGdpPercap = median(gdp_cap))
by_continent_1957
## # A tibble: 5 × 2
## continent medianGdpPercap
## <chr> <dbl>
## 1 Africa 1024.
## 2 Americas 3781.
## 3 Asia 1548.
## 4 Europe 6067.
## 5 Oceania 11599.
#Plot the barchart of the by_continent_1957 data
ggplot(by_continent_1957, aes(x = continent, y = medianGdpPercap)) + geom_col()
#Creating Histogram
#Filter the dataset for the year 1957. Create a new column called population per million (pop_by_mil)
gapminder_1957_pop <- gapminder %>% filter(year == 1957) %>% mutate(pop_by_mil = population/1000000)
head(gapminder_1957_pop, 10)
## country year population continent life_exp gdp_cap pop_by_mil
## 1 Afghanistan 1957 9240934 Asia 30.332 820.8530 9.240934
## 2 Albania 1957 1476505 Europe 59.280 1942.2842 1.476505
## 3 Algeria 1957 10270856 Africa 45.685 3013.9760 10.270856
## 4 Angola 1957 4561361 Africa 31.999 3827.9405 4.561361
## 5 Argentina 1957 19610538 Americas 64.399 6856.8562 19.610538
## 6 Australia 1957 9712569 Oceania 70.330 10949.6496 9.712569
## 7 Austria 1957 6965860 Europe 67.480 8842.5980 6.965860
## 8 Bahrain 1957 138655 Asia 53.832 11635.7995 0.138655
## 9 Bangladesh 1957 51365468 Asia 39.348 661.6375 51.365468
## 10 Belgium 1957 8989111 Europe 69.240 9714.9606 8.989111
#Create a histogram of the gapminder_1957_pop dataset
ggplot(gapminder_1957_pop, aes(x = pop_by_mil)) + geom_histogram(bins = 50) + scale_x_log10()
#Create the gapminder_1957 and filter for the year 1962
gapminder_1962 <- gapminder %>% filter(year == 1962)
head(gapminder_1962, 10)
## country year population continent life_exp gdp_cap
## 1 Afghanistan 1962 10267083 Asia 31.997 853.1007
## 2 Albania 1962 1728137 Europe 64.820 2312.8890
## 3 Algeria 1962 11000948 Africa 48.303 2550.8169
## 4 Angola 1962 4826015 Africa 34.000 4269.2767
## 5 Argentina 1962 21283783 Americas 65.142 7133.1660
## 6 Australia 1962 10794968 Oceania 70.930 12217.2269
## 7 Austria 1962 7129864 Europe 69.540 10750.7211
## 8 Bahrain 1962 171863 Asia 56.923 12753.2751
## 9 Bangladesh 1962 56839289 Asia 41.216 686.3416
## 10 Belgium 1962 9218400 Europe 70.250 10991.2068
#create a box plot of the gapminder_1962 data
ggplot(gapminder_1962, aes(x = continent, y = gdp_cap)) + geom_boxplot() + scale_y_log10()