This is a report about utilzing R shinny packages to build a Data Visualization about Sociological Information (fertility rate, life expectancy, GDP, etc) around OECD & OPEC countries

Download all the useful libraries and imported them

library(shiny)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(readxl)
library(leaflet)
library(mapview)
## The legacy packages maptools, rgdal, and rgeos, underpinning this package
## will retire shortly. Please refer to R-spatial evolution reports on
## https://r-spatial.org/r/2023/05/15/evolution4.html for details.
## This package is now running under evolution status 0
library(maps)

Load data and pre-processing data

path <- 'gapminder.csv'
df <- read.csv(path, sep = ',')

first, let’s check the data type of each column.

str(df)
## 'data.frame':    10545 obs. of  10 variables:
##  $ X               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ country         : chr  "Albania" "Algeria" "Angola" "Antigua and Barbuda" ...
##  $ year            : int  1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 ...
##  $ infant_mortality: num  115.4 148.2 208 NA 59.9 ...
##  $ life_expectancy : num  62.9 47.5 36 63 65.4 ...
##  $ fertility       : num  6.19 7.65 7.32 4.43 3.11 4.55 4.82 3.45 2.7 5.57 ...
##  $ population      : int  1636054 11124892 5270844 54681 20619075 1867396 54208 10292328 7065525 3897889 ...
##  $ gdp             : num  NA 1.38e+10 NA NA 1.08e+11 ...
##  $ continent       : chr  "Europe" "Africa" "Africa" "Americas" ...
##  $ region          : chr  "Southern Europe" "Northern Africa" "Middle Africa" "Caribbean" ...
print(head(df))
##   X             country year infant_mortality life_expectancy fertility
## 1 1             Albania 1960           115.40           62.87      6.19
## 2 2             Algeria 1960           148.20           47.50      7.65
## 3 3              Angola 1960           208.00           35.98      7.32
## 4 4 Antigua and Barbuda 1960               NA           62.97      4.43
## 5 5           Argentina 1960            59.87           65.39      3.11
## 6 6             Armenia 1960               NA           66.86      4.55
##   population          gdp continent          region
## 1    1636054           NA    Europe Southern Europe
## 2   11124892  13828152297    Africa Northern Africa
## 3    5270844           NA    Africa   Middle Africa
## 4      54681           NA  Americas       Caribbean
## 5   20619075 108322326649  Americas   South America
## 6    1867396           NA      Asia    Western Asia

after checking the types of each column we decide to transfer certain columns into correct data types

df$country <- as.factor(df$country)
df$continent <- as.factor(df$continent)
df$region <- as.factor(df$region)

lets detect how many missing values we have in chart

na_counts <- colSums(is.na(df))
print(na_counts)
##                X          country             year infant_mortality 
##                0                0                0             1453 
##  life_expectancy        fertility       population              gdp 
##                0              187              185             2972 
##        continent           region 
##                0                0

replace those missing values with zero, as all missing values are from numerical columns

df_cleaned <- na.omit(df)
str(df_cleaned)
## 'data.frame':    7139 obs. of  10 variables:
##  $ X               : int  2 5 8 9 11 13 14 16 18 20 ...
##  $ country         : Factor w/ 185 levels "Albania","Algeria",..: 2 5 8 9 11 13 14 16 18 20 ...
##  $ year            : int  1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 ...
##  $ infant_mortality: num  148.2 59.9 20.3 37.3 51 ...
##  $ life_expectancy : num  47.5 65.4 70.9 68.8 62 ...
##  $ fertility       : num  7.65 3.11 3.45 2.7 4.5 6.73 4.33 2.6 6.28 6.7 ...
##  $ population      : int  11124892 20619075 10292328 7065525 109526 48200702 230934 9140563 2431620 3693451 ...
##  $ gdp             : num  1.38e+10 1.08e+11 9.67e+10 5.24e+10 1.31e+09 ...
##  $ continent       : Factor w/ 5 levels "Africa","Americas",..: 1 2 5 4 2 3 2 4 1 2 ...
##  $ region          : Factor w/ 22 levels "Australia and New Zealand",..: 11 16 1 22 2 18 2 22 20 16 ...
##  - attr(*, "na.action")= 'omit' Named int [1:3406] 1 3 4 6 7 10 12 15 17 19 ...
##   ..- attr(*, "names")= chr [1:3406] "1" "3" "4" "6" ...

lets code a Data Visualization using shiny

# Define UI
ui <- fluidPage(
  title = "Visualization of Demographic Data from OECD & OPEC countries",
  
  # Page 1: Scatter Plot 1
  tabsetPanel(
    tabPanel(
      "Tab One: Demographic Data changes through years in each indivisual country",
    sidebarLayout(
      sidebarPanel(
        selectInput("Measure_1", "Select Demographic Measure:", 
                    choices = c("infant_mortality", "life_expectancy", "fertility", "population", "gdp")),
        selectInput("Country_1", "Select targeted Country:", choices = unique(df_cleaned$country))
      ),
      mainPanel(
        plotOutput("scatterplot1")
        )
      )
    ),
  
 
 
  # Page 2: Scatter Plotly 2
    tabPanel(
      "Tab Two: Distribution of Demographic Data across countries within each Geographical Continent in specific year", 
      sidebarLayout(
        sidebarPanel(
          selectInput("Year_2", "Select  Year", choices = unique(df_cleaned$year)),
          selectInput("Continent_2", "Select Continent", choices = unique(df_cleaned$continent)),
          selectInput("X_2", "Select X Variable", 
                    choices = c("infant_mortality", "life_expectancy", "fertility", "gdp")),
          selectInput("Y_2", "Select Y Variable", 
                    choices = c("infant_mortality", "life_expectancy", "fertility", "gdp")),
        ),
      mainPanel(
        plotlyOutput("scatterplot2")
        )
      )
    )
  ),
   # Plain text User Guide
  textOutput("plainText"),
  
  # Clickable URL for Data Source
  uiOutput("urlOutput")
  
)


# Define server
server <- function(input, output) {
  # Page 1: Scatter Plot 1
  output$scatterplot1 <- renderPlot({
    df_cutted <- subset(df_cleaned, country == input$Country_1)
    ggplot(df_cutted, aes(x = year, y = df_cutted[[input$Measure_1]])) +
      geom_point() +
      geom_line() +
      labs(x = "Year", y = input$Measure_1, 
           title = paste("Change timeline for", input$Measure_1, "changes through out years in", 
                         input$Country_1)) +
       theme(plot.title = element_text(size = 16, color = "blue"),
              axis.text = element_text(size = 12, face = "bold"),
              axis.title = element_text(size = 15, face = "bold"))
  })
  
  # Page 2: Scatter Plotly 2 
 output$scatterplot2 <- renderPlotly({ 
      filtered_df <- subset(df_cleaned, year == as.integer(input$Year_2) & continent == input$Continent_2)
      p <- ggplot(filtered_df, 
              aes_string(x = filtered_df[[input$X_2]], y = filtered_df[[input$Y_2]], 
                         text = filtered_df$country)) +
           geom_point(aes(size = population, color = country)) +
           scale_size_continuous(range = c(1, 20)) +
           scale_color_manual(values = sample(colors(), length(unique(filtered_df$country)))) +
           labs(x = input$X_2, y = input$Y_2,
                title = paste("Country Demographic Data of", 
                         input$Continent_2,"in", input$Year_2)) +
           theme(plot.title = element_text(size = 16, color = "blue"),
                 axis.text = element_text(size = 12, face = "bold"),
                 axis.title = element_text(size = 15, face = "bold")) +
           theme_bw()
    
      ggplotly(p, tooltip = "text") %>%
      layout(hovermode = "closest")
      
      
  })
 
  # Render plain text of User Guide
  output$plainText <- renderText({
    "User Guide: This is a simple visualization of demograohic data from OECD & APEC couuntries from 1960 to 2011. It includes two Tabs. In Tab One, you can select a specifc country and view the change timeline of one certain demographic measure as you wish. In Tab Two, you can select a continent and view the distribution of demographic data in each country within the chosen continet in a specific year (Notice: the size of a dot representing the relative population size of a country)."
  })
  
  # Render clickable URL of Data Source
  output$urlOutput <- renderUI({
    url <- "https://www.kaggle.com/datasets/utkarshx27/health-and-income-outcomes"
    tags$a(href = url, "Click here for the Data Source: Health & Income outcomes of OECD & OPEC countries")
  })    
  
}

# Run the app
shinyApp(ui = ui, server = server)
## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.
Shiny applications not supported in static R Markdown documents

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.