library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr 1.1.4 âś” readr 2.1.5
## âś” forcats 1.0.0 âś” stringr 1.5.1
## âś” ggplot2 3.5.1 âś” tibble 3.2.1
## âś” lubridate 1.9.3 âś” tidyr 1.3.1
## âś” purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
# Accesses to Data
setwd("C:/Users/ava/Downloads/data110")
nations <- read_csv('nations.csv')
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# use filter to select the countries
selected_countries <- nations %>%
filter(country %in% c("China", "Germany" , "India" ,"Japan"))%>%
# using filter and !is.na to clean the data
filter( !is.na(year) & !is.na(country) &
!is.na(gdp_percap) & !is.na(population)) %>%
# using mutate to create a new variable
mutate(
gdp_trillions = (gdp_percap * population) / 1e12
)
# using summaries to have average of GDP of countries (otherwise their will be only one observation)
data <- selected_countries |>
group_by(year , country)%>%
summarise( avg_gdp = mean(gdp_percap))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# ggplot for mapping following a geom line
p1 <- data |>
ggplot(aes( x= year, y= avg_gdp , colour = country )) +
geom_line(aes( group = country)) + geom_point() +
# titling the chart
labs(title = "GDP of Different Countries Over the Years",
subtitle = "From 1990 to 2015",
x = "Year",
y = "GDP (in trillions)",
color = "Country") +
# changing the pallet and them for clearity of the chart
scale_color_brewer(palette = "Set2") +
theme_minimal() +
# adding legend title
theme(plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10))
p1
# creating a second data for average GDP of nations
data2 <- nations |>
group_by(region, year)%>%
summarise(GDP = sum(gdp_percap, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
# ggplot for mapping and using argument fill to fill in the regions (not color)
p2 <- data2 |>
ggplot(aes( x= year, y= GDP , fill = region)) +
# creating function Geom_area and creating white lines for sperating regions
geom_area(alpha = 0.8, linewidth = 0.5, colour = "white") +
labs(title = "GDP of Different Regions Over the Years",
x = "year",
y = "GDP",
fill = "Region") +
# changing theme and palette
theme_minimal()+ scale_fill_brewer(palette = "Set2")
p2
The difference between fill and color when making the chart is that color Argument is perfect for drawing plot borders or outlines because it can be used to alter the color of points, lines, and shapes’ borders. For example, in a line plot made with geom_line(), the lines’ color is determined by the color aesthetic. The interior of shapes, such as areas underlines in an area plot (geom_area()), and bars in a bar plot (geom_bar()), are filled with the fill argument, on the other hand. The color argument can be used to add a border around these filled areas, and the fill argument can be used to distinguish different regions by filling them with different colors when creating an area chart. This boundary, which is frequently a thin line, helps in dividing the areas.