Nations Dataset

Author

Olivia Yuengling

Loading the Nations Dataset

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
nations <- read_csv("C:/Users/omyue/OneDrive/Desktop/Montgomery College/Spring 24/Data 101/datasets/nations.csv")
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Graph No. 1: Line-Dot Graph

Cleaning and Preparing the Data

new_nations <- nations |> # creating a new dataframe for our plot
  mutate(gdp_percap = (gdp_percap*population)/10^9) # finding the gdp for the countries and then dividing by billions
new_nations <- new_nations |>
  select("country", "year", "gdp_percap") # selecting the needed rows for the graph

# selects desired countries. in this case we are choosing the countries with the highest population densities.
chart_1df <- new_nations |>
  filter(country == "Hong Kong SAR, China")
chart_2df <- new_nations |>
  filter(country == "Bangladesh")
chart_3df <- new_nations |>
  filter(country == "Macao SAR, China")
chart_4df <- new_nations |>
  filter(country == "Singapore")

Merging the Datasets

merge_1 <- rbind(chart_1df, chart_2df, chart_3df, chart_4df) # merges the datasets as one

Plotting the Graph!

# let's plot!
chart_1 <- ggplot(merge_1, aes(x = year, y = gdp_percap, color = country)) +
 labs(title = "Average GDP by Year for the Top 4 Population Dense Nations") +
  geom_line() + # plots the line
  geom_point() + # creates points for a country's gdp for a specified year
  scale_color_brewer(palette="Set1")+ # sets color palette
 xlab("Year") + # labels x axis
 ylab("Gross Domestic Pay (GDP) (billions)") + # labels y axis
 theme_minimal(base_size = 10)
chart_1

Graph No. 2: Area Graph

Plotting the Chart

new_nations2 <- nations |> # creating new data frame for the next chart
  mutate(gdp = (gdp_percap*population)/10^12) |> # creates new column for gdp
  group_by(region, year) |> # grouping by the world region and year
  summarize(gdp = sum(gdp, na.rm = TRUE)) # removing rows with na's
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.

Plotting the Graph!

# plotting out the chart
chart2 <- ggplot(new_nations2, aes(x = year, y = gdp, fill = region)) +
  geom_area() + # using instructed command for the plot
  scale_fill_brewer(palette = "Set2") + # setting the desire color palette
  labs(title = "GDP by World Bank Region")+ # labeling the plot
  xlab("Year")+
  ylab("Gross Domestic Product (trillions)")
chart2 # printing out the chart