My working directory is already set but I like to manually set it each time to make sure there are no errors.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.1
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
setwd("C:/Users/andre/OneDrive/Documents/School/Data 110")
nations <- read_csv("nations.csv")
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(nations)
## # A tibble: 6 × 10
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## # ℹ 2 more variables: region <chr>, income <chr>
Added the variable “gdp” to the data set.
nations <- nations %>%
mutate(gdp = (gdp_percap * population) / 1e12)
head(nations)
## # A tibble: 6 × 11
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## # ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>
Looks good. Also create a new variable “gdp2” which is just a cleaned version of “gdp” without NA values.
nations1 <- filter(nations, country %in% c("United States", "China", "Japan", "Germany"))
nations2 <- nations %>%
group_by(region, year) %>%
summarise(GDP = sum(gdp, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
head(nations1)
## # A tibble: 6 × 11
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CN CHN China 1992 1260. 1164970000 18.3 29.4
## 2 CN CHN China 2005 5053. 1303720000 12.4 14
## 3 CN CHN China 2000 2915. 1262645000 14.0 21.2
## 4 CN CHN China 1991 1091. 1150780000 19.7 29.7
## 5 CN CHN China 2013 12219. 1357380000 12.1 6.3
## 6 CN CHN China 1999 2650. 1252735000 14.6 22.2
## # ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>
Looks good.
head(nations2)
## # A tibble: 6 × 3
## # Groups: region [1]
## region year GDP
## <chr> <dbl> <dbl>
## 1 East Asia & Pacific 1990 5.52
## 2 East Asia & Pacific 1991 6.03
## 3 East Asia & Pacific 1992 6.50
## 4 East Asia & Pacific 1993 7.04
## 5 East Asia & Pacific 1994 7.64
## 6 East Asia & Pacific 1995 8.29
Using “nations1” I create the first plot.
p1 <- ggplot(nations1, aes(x = year, y = gdp, color = country)) +
geom_point() +
geom_line() +
scale_color_brewer(palette = "Set1") +
labs(title = "China's Rise to Become the Largest Economy",
x = "Year",
y = "GDP (trillions of dollars)") +
guides(color = guide_legend(title = "Country"))
p1
I was able to make it interactive with ease but can not get it to display “population”. I tried mutating the variable to see if the legnth of the numbers were the issue but that didn’t change anything.
p1_active <- ggplotly(p1, tooltip = c("country", "gdp"))
p1_active
Using “nations2” I create the second plot. Making sure to use “fill” in the aes function and “color” in the geom_area function.
p2 <- ggplot(nations2, aes(x = year, y = GDP, fill = region)) +
geom_area(color = "white")+
scale_fill_brewer(palette = "Set2") +
labs(title = "GDP by World Bank Region",
x = "Year",
y = "GDP (trillions of dollars)") +
guides(fill = guide_legend(title = "Region"))
p2
I am having a hard time getting it so when you scroll over the region it displays the data rather than being on the line.
p2_active <- ggplotly(p2, tooltip = c("y", "x", "region"))
p2_active