library(shiny)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(readxl)
library(leaflet)
library(mapview)
## The legacy packages maptools, rgdal, and rgeos, underpinning this package
## will retire shortly. Please refer to R-spatial evolution reports on
## https://r-spatial.org/r/2023/05/15/evolution4.html for details.
## This package is now running under evolution status 0
library(maps)
path <- 'gapminder.csv'
df <- read.csv(path, sep = ',')
first, let’s check the data type of each column.
str(df)
## 'data.frame': 10545 obs. of 10 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ country : chr "Albania" "Algeria" "Angola" "Antigua and Barbuda" ...
## $ year : int 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 ...
## $ infant_mortality: num 115.4 148.2 208 NA 59.9 ...
## $ life_expectancy : num 62.9 47.5 36 63 65.4 ...
## $ fertility : num 6.19 7.65 7.32 4.43 3.11 4.55 4.82 3.45 2.7 5.57 ...
## $ population : int 1636054 11124892 5270844 54681 20619075 1867396 54208 10292328 7065525 3897889 ...
## $ gdp : num NA 1.38e+10 NA NA 1.08e+11 ...
## $ continent : chr "Europe" "Africa" "Africa" "Americas" ...
## $ region : chr "Southern Europe" "Northern Africa" "Middle Africa" "Caribbean" ...
print(head(df))
## X country year infant_mortality life_expectancy fertility
## 1 1 Albania 1960 115.40 62.87 6.19
## 2 2 Algeria 1960 148.20 47.50 7.65
## 3 3 Angola 1960 208.00 35.98 7.32
## 4 4 Antigua and Barbuda 1960 NA 62.97 4.43
## 5 5 Argentina 1960 59.87 65.39 3.11
## 6 6 Armenia 1960 NA 66.86 4.55
## population gdp continent region
## 1 1636054 NA Europe Southern Europe
## 2 11124892 13828152297 Africa Northern Africa
## 3 5270844 NA Africa Middle Africa
## 4 54681 NA Americas Caribbean
## 5 20619075 108322326649 Americas South America
## 6 1867396 NA Asia Western Asia
after checking the types of each column we decide to transfer certain columns into correct data types
df$country <- as.factor(df$country)
df$continent <- as.factor(df$continent)
df$region <- as.factor(df$region)
lets detect how many missing values we have in chart
na_counts <- colSums(is.na(df))
print(na_counts)
## X country year infant_mortality
## 0 0 0 1453
## life_expectancy fertility population gdp
## 0 187 185 2972
## continent region
## 0 0
replace those missing values with zero, as all missing values are from numerical columns
df_cleaned <- na.omit(df)
str(df_cleaned)
## 'data.frame': 7139 obs. of 10 variables:
## $ X : int 2 5 8 9 11 13 14 16 18 20 ...
## $ country : Factor w/ 185 levels "Albania","Algeria",..: 2 5 8 9 11 13 14 16 18 20 ...
## $ year : int 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 ...
## $ infant_mortality: num 148.2 59.9 20.3 37.3 51 ...
## $ life_expectancy : num 47.5 65.4 70.9 68.8 62 ...
## $ fertility : num 7.65 3.11 3.45 2.7 4.5 6.73 4.33 2.6 6.28 6.7 ...
## $ population : int 11124892 20619075 10292328 7065525 109526 48200702 230934 9140563 2431620 3693451 ...
## $ gdp : num 1.38e+10 1.08e+11 9.67e+10 5.24e+10 1.31e+09 ...
## $ continent : Factor w/ 5 levels "Africa","Americas",..: 1 2 5 4 2 3 2 4 1 2 ...
## $ region : Factor w/ 22 levels "Australia and New Zealand",..: 11 16 1 22 2 18 2 22 20 16 ...
## - attr(*, "na.action")= 'omit' Named int [1:3406] 1 3 4 6 7 10 12 15 17 19 ...
## ..- attr(*, "names")= chr [1:3406] "1" "3" "4" "6" ...
shiny# Define UI
ui <- fluidPage(
title = "Visualization of Demographic Data from OECD & OPEC countries",
# Page 1: Scatter Plot 1
tabsetPanel(
tabPanel(
"Tab One: Demographic Data changes through years in each indivisual country",
sidebarLayout(
sidebarPanel(
selectInput("Measure_1", "Select Demographic Measure:",
choices = c("infant_mortality", "life_expectancy", "fertility", "population", "gdp")),
selectInput("Country_1", "Select targeted Country:", choices = unique(df_cleaned$country))
),
mainPanel(
plotOutput("scatterplot1")
)
)
),
# Page 2: Scatter Plotly 2
tabPanel(
"Tab Two: Distribution of Demographic Data across countries within each Geographical Continent in specific year",
sidebarLayout(
sidebarPanel(
selectInput("Year_2", "Select Year", choices = unique(df_cleaned$year)),
selectInput("Continent_2", "Select Continent", choices = unique(df_cleaned$continent)),
selectInput("X_2", "Select X Variable",
choices = c("infant_mortality", "life_expectancy", "fertility", "gdp")),
selectInput("Y_2", "Select Y Variable",
choices = c("infant_mortality", "life_expectancy", "fertility", "gdp")),
),
mainPanel(
plotlyOutput("scatterplot2")
)
)
)
),
# Plain text User Guide
textOutput("plainText"),
# Clickable URL for Data Source
uiOutput("urlOutput")
)
# Define server
server <- function(input, output) {
# Page 1: Scatter Plot 1
output$scatterplot1 <- renderPlot({
df_cutted <- subset(df_cleaned, country == input$Country_1)
ggplot(df_cutted, aes(x = year, y = df_cutted[[input$Measure_1]])) +
geom_point() +
geom_line() +
labs(x = "Year", y = input$Measure_1,
title = paste("Change timeline for", input$Measure_1, "changes through out years in",
input$Country_1)) +
theme(plot.title = element_text(size = 16, color = "blue"),
axis.text = element_text(size = 12, face = "bold"),
axis.title = element_text(size = 15, face = "bold"))
})
# Page 2: Scatter Plotly 2
output$scatterplot2 <- renderPlotly({
filtered_df <- subset(df_cleaned, year == as.integer(input$Year_2) & continent == input$Continent_2)
p <- ggplot(filtered_df,
aes_string(x = filtered_df[[input$X_2]], y = filtered_df[[input$Y_2]],
text = filtered_df$country)) +
geom_point(aes(size = population, color = country)) +
scale_size_continuous(range = c(1, 20)) +
scale_color_manual(values = sample(colors(), length(unique(filtered_df$country)))) +
labs(x = input$X_2, y = input$Y_2,
title = paste("Country Demographic Data of",
input$Continent_2,"in", input$Year_2)) +
theme(plot.title = element_text(size = 16, color = "blue"),
axis.text = element_text(size = 12, face = "bold"),
axis.title = element_text(size = 15, face = "bold")) +
theme_bw()
ggplotly(p, tooltip = "text") %>%
layout(hovermode = "closest")
})
# Render plain text of User Guide
output$plainText <- renderText({
"User Guide: This is a simple visualization of demograohic data from OECD & APEC couuntries from 1960 to 2011. It includes two Tabs. In Tab One, you can select a specifc country and view the change timeline of one certain demographic measure as you wish. In Tab Two, you can select a continent and view the distribution of demographic data in each country within the chosen continet in a specific year (Notice: the size of a dot representing the relative population size of a country)."
})
# Render clickable URL of Data Source
output$urlOutput <- renderUI({
url <- "https://www.kaggle.com/datasets/utkarshx27/health-and-income-outcomes"
tags$a(href = url, "Click here for the Data Source: Health & Income outcomes of OECD & OPEC countries")
})
}
# Run the app
shinyApp(ui = ui, server = server)
## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.