merge multiple dataframe)ggplot, plotly,
gganimate)pacman::p_load(
rio,
here,
tidyverse,
lubridate,
plotly,
gganimate
)
# GDP
wb_gdp <- import(here("data", "API_NY.GDP.MKTP.CD_DS2_en_excel_v2_5358382.xls"), which = "Data", skip = 3)
wb_metadata <- import(here("data", "API_NY.GDP.MKTP.CD_DS2_en_excel_v2_5358382.xls"), which = "Metadata - Countries")
# Population
wb_pop <- import(here("data", "API_SP.POP.TOTL_DS2_en_excel_v2_5358476.xls"), which = "Data", skip = 3)
# Life expectancy at birth
wb_life <- import(here("data", "API_SP.DYN.LE00.IN_DS2_en_excel_v2_5358902.xls"), which = "Data", skip = 3)
# GDP per capita
wb_gdpcap <- import(here("data", "API_NY.GDP.PCAP.CD_DS2_en_excel_v2_5358450.xls"), which = "Data", skip = 3)
# GDP growth (annual %)
wb_gdpgrowth <- import(here("data", "API_NY.GDP.MKTP.KD.ZG_DS2_en_excel_v2_5358368.xls"), which = "Data", skip = 3)
# merge 2 datasets
dt <- merge(wb_gdp, wb_metadata, by = 'Country Code')
# transform data from wide to long
dt <- dt %>%
pivot_longer(
cols = c(5:66),
names_to = "year",
values_to = "gdp"
)
# drop columns
dt$`Indicator Code`<- NULL
dt$SpecialNotes <- NULL
dt$TableName <- NULL
dt$`Indicator Name` <- NULL
Note that we need to transform variable class for ggplot:
# transform variable class
dt$year <- as.integer(dt$year)
dt$`Country Code` <- as.factor(dt$`Country Code`)
dt$`Country Name` <- as.factor(dt$`Country Name`)
# check na value
sum(is.na(dt))
## [1] 9288
# prepare data
dt_aut <- dt[dt$`Country Code` == "AUT", ]
dt_aut <- cbind(dt_aut, "GDP" = dt_aut$gdp/1000000000)
# plot
ggplot(dt_aut, aes(x = year, y = gdp)) +
geom_line() +
labs(title = "GDP (Austria)",
caption = "Source: Wordbank Data 2023",
x = "Year",
y = "Bil dollars") +
theme_classic()
If we want the graph with colours
# prepare data - dplyr
dt_multi <- dt %>% filter(`Country Name` == c("Austria", "France", "Germany", "Switzerland")) # dplyr
dt_multi <- dt_multi %>% mutate(GDP = gdp/1000000000) # dplyr
# prepare data - R Base
# dt_multi <- dt[dt$`Country Code` %in% c("Austria", "France", "Germany", "Switzerland"), ] # R base
# dt_multi <- cbind(dt_multi, "GDP" = dt_multi$gdp/1000000000) # R base
# basic plot
a1 <- ggplot(dt_multi, aes(x = year, y = GDP, colour =`Country Name`)) +
geom_line()
# customise the plot
a2 <- ggplot(dt_multi, aes(x = year, y = GDP, colour =`Country Name`)) +
geom_line() +
labs(title = "GDP",
subtitle = "In thousand dollars",
caption = "Source: Worldbank data 2023",
x = "Year") + # Rename the title of the x-axis
theme_classic() +
theme(axis.title.y = element_blank()) # Delete the title of the y axis
# Put 2 plots one the same page
gridExtra::grid.arrange(a1, a2, ncol = 1, nrow = 2) # arrange plot in 1 same page
If we dont’t want plot with colors and want to customise legend name
ggplot(dt_multi, aes(year, GDP, linetype =`Country Name`)) + # using linetype =
geom_line() +
labs(title = "GDP",
subtitle = "In thousand dollars",
caption = "Source: Worldbank data 2023",
x = "Year") +
scale_linetype_discrete(name = "Country Name") + # Customise legend (bang chu thich)
theme_classic() +
theme(axis.title.y = element_blank()) # Delete the title of the y axis
# Extract data
dt_gdp <- dt %>%
filter(`Country Name` %in% c("Austria", "France", "Germany", "Switzerland"),
year >= 2000, year <= 2010) %>%
select(`year`, `Country Code`, `Country Name`, `gdp`) %>%
rename(Year = `year`, Country = `Country Name`, GDP = `gdp`)
# Create graph
b1 <- ggplot(dt_gdp, aes(Year, GDP, fill = Country)) +
geom_bar(stat = "identity") +
labs(title = "GDP",
subtitle = "In billion dollars",
caption = "Source: Worldbank data 2023",
x = "Year")
# Create graph
b2 <- ggplot(dt_gdp, aes(Year, GDP, fill = Country)) +
geom_bar(stat = "identity") +
labs(title = "GDP",
subtitle = "In billion dollars",
caption = "Source: Worldbank data 2023",
x = "Year") +
# Change the standard set of colours used to fill the bars
scale_fill_brewer(palette = "Set1") +
theme_classic() +
theme(axis.title.y = element_blank()) # Delete the title of the y axis
# Put 2 plots one the same page
gridExtra::grid.arrange(b1, b2, ncol = 2, nrow = 2) # arrange plot in 1 same page
gridExtra::grid.arrange(b1, b2, ncol = 1, nrow = 2) # arrange plot in 1 same page
Step 1: prepare data Tranform data from long to wide
# Population
wb_pop <- wb_pop %>%
pivot_longer(
cols = c(5:66),
names_to = "year",
values_to = "pop"
) %>%
select(`Country Code`, year, pop)
# Life expectancy at birth
wb_life <- wb_life %>%
pivot_longer(
cols = c(5:66),
names_to = "year",
values_to = "lifeExp"
) %>%
select(`Country Code`, year, lifeExp)
# GDP per capita
wb_gdpcap <- wb_gdpcap %>%
pivot_longer(
cols = c(5:66),
names_to = "year",
values_to = "gdpPercap"
) %>%
select(`Country Code`, year, gdpPercap)
# GDP growth (annual %)
wb_gdpgrowth <- wb_gdpgrowth %>%
pivot_longer(
cols = c(5:66),
names_to = "year",
values_to = "gdpGrowth"
) %>%
select(`Country Code`, year, gdpGrowth)
Add cols of pop, life expectancy, gdp per cap, and gdp growth to dataframe of gdp
df_list <- list(dt, wb_pop, wb_life, wb_gdpcap, wb_gdpgrowth)
full_dt <- Reduce(function(x, y) merge(x, y, all=TRUE), df_list) ## using R base
head(full_dt, 10)
## Country Code year Country Name Region IncomeGroup gdp
## 1 ABW 1960 Aruba Latin America & Caribbean High income NA
## 2 ABW 1961 Aruba Latin America & Caribbean High income NA
## 3 ABW 1962 Aruba Latin America & Caribbean High income NA
## 4 ABW 1963 Aruba Latin America & Caribbean High income NA
## 5 ABW 1964 Aruba Latin America & Caribbean High income NA
## 6 ABW 1965 Aruba Latin America & Caribbean High income NA
## 7 ABW 1966 Aruba Latin America & Caribbean High income NA
## 8 ABW 1967 Aruba Latin America & Caribbean High income NA
## 9 ABW 1968 Aruba Latin America & Caribbean High income NA
## 10 ABW 1969 Aruba Latin America & Caribbean High income NA
## pop lifeExp gdpPercap gdpGrowth
## 1 54608 64.152 NA NA
## 2 55811 64.537 NA NA
## 3 56682 64.752 NA NA
## 4 57475 65.132 NA NA
## 5 58178 65.294 NA NA
## 6 58782 65.502 NA NA
## 7 59291 66.063 NA NA
## 8 59522 66.439 NA NA
## 9 59471 66.757 NA NA
## 10 59330 67.168 NA NA
Step 2: Plot graph
full_dt <- full_dt %>% filter(!is.na(Region))
p1 <- ggplot(
full_dt, aes(x = gdpPercap, y = lifeExp, size = pop, colour = `Country Name`)) +
geom_point(show.legend = FALSE, alpha = 0.7) +
scale_color_viridis_d() +
scale_size(range = c(2, 12)) +
# scale_x_log10() +
labs(x = "GDP per capita", y = "Life expectancy")
p1
p2 <- ggplot(
full_dt, aes(x = gdpPercap, y = lifeExp, size = pop, colour = `Country Name`)) +
geom_point(show.legend = FALSE, alpha = 0.7) +
scale_color_viridis_d() +
scale_size(range = c(2, 12)) +
scale_x_log10() +
labs(x = "GDP per capita", y = "Life expectancy")
p2
gridExtra::grid.arrange(p1, p2, ncol = 1, nrow = 2) # arrange plot in 1 same page
Step 2: Plot graph
full_dt <- full_dt %>% filter(year >= 2020)
p1 <- ggplot(
full_dt, aes(x = gdpPercap, y = lifeExp, size = pop, colour = `Country Name`)) +
geom_point(show.legend = FALSE, alpha = 0.7) +
scale_color_viridis_d() +
scale_size(range = c(2, 12)) +
# scale_x_log10() +
labs(x = "GDP per capita", y = "Life expectancy")
p1
p2 <- ggplot(
full_dt, aes(x = gdpPercap, y = lifeExp, size = pop, colour = `Country Name`)) +
geom_point(show.legend = FALSE, alpha = 0.7) +
scale_color_viridis_d() +
scale_size(range = c(2, 12)) +
scale_x_log10() +
labs(x = "GDP per capita", y = "Life expectancy")
p2
gridExtra::grid.arrange(p1, p2, ncol = 1, nrow = 2) # arrange plot in 1 same page
Using animation
# Note:R Markdown does not support to visualise this graph in the dynamic format when it is published in rpub
gif <- p2 +
# turn static plot into animation by defining the content of the transition_time object.
transition_time(year) +
labs(title = "Year: {frame_time}")
gif
## NULL
Using plot_ly()
fig <- full_dt %>%
plot_ly(
x = ~gdpPercap,
y = ~lifeExp,
size = ~pop,
color = ~Region,
frame = ~year,
text = ~`Country Name`,
hoverinfo = "text",
type = 'scatter',
mode = 'markers'
)
fig <- fig %>% layout(
xaxis = list(
type = "log"
)
)
fig