#install.packages("treemap")
#install.packages("RColorBrewer")
library(treemap)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(RColorBrewer)
library(ggplot2)
Read the data
setwd("C:/Data 110.MC")
nations <- read_csv("nations.hw6.csv")
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(nations)
## # A tibble: 6 × 10
## iso2c iso3c country year gdp_percap population birth_…¹ neona…² region income
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8 Europ… High …
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2 Europ… High …
## 3 AD AND Andorra 2003 NA 74783 10.3 2 Europ… High …
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3 Europ… High …
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7 Europ… High …
## 6 AD AND Andorra 2011 NA 82326 NA 1.6 Europ… High …
## # … with abbreviated variable names ¹birth_rate, ²neonat_mortal_rate
s1 <- nations %>%
filter(!is.na(gdp_percap)) # remove na's
s2 <-s1 %>% mutate(GDPc=((gdp_percap*population)/1e+12)) # calculate GDP per population
head (s2)
## # A tibble: 6 × 11
## iso2c iso3c country year gdp_p…¹ popul…² birth…³ neona…⁴ region income GDPc
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 AE ARE United … 1991 73037. 1913190 24.6 7.9 Middl… High … 0.140
## 2 AE ARE United … 1993 71960. 2127863 22.4 7.3 Middl… High … 0.153
## 3 AE ARE United … 2001 83534. 3217865 15.8 5.5 Middl… High … 0.269
## 4 AE ARE United … 1992 73154. 2019014 23.5 7.6 Middl… High … 0.148
## 5 AE ARE United … 1994 74684. 2238281 21.3 6.9 Middl… High … 0.167
## 6 AE ARE United … 2007 75427. 6010100 12.8 4.7 Middl… High … 0.453
## # … with abbreviated variable names ¹gdp_percap, ²population, ³birth_rate,
## # ⁴neonat_mortal_rate
s3 <- s2 %>%
filter(country == "Israel" | country == "Egypt, Arab Rep." | country =="Turkey" | country == "Germany") # selected 4 countries
ggplot(s3, aes(x = year, y = GDPc )) +
labs(title = "GDP by four countries",
caption = "Source: International GDP data") +
xlab("Country") +
ylab ("Calculated GDP ratio") +
theme_minimal(base_size = 12)

p1 <- ggplot(s3, aes(x = year, y = GDPc, color=country )) +
labs(title = "GDP by four countries",
caption = "Source: International GDP data") +
xlab("Country") +
ylab ("Calculated GDP ratio") +
theme_minimal(base_size = 12)
p1 + geom_point()+
geom_line()

p2<- p1+ scale_colour_brewer(palette = "Set1")
s4 <-nations %>% # by statement
group_by(region, year)
s4 <- s4 %>%
summarise(GDPc = sum(gdp_percap, na.rm = TRUE)) # remove NA
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
options(scipen = 999)
A1 <- ggplot(s4, aes(x = year, y = GDPc, fill=region )) +
labs(title = "GDP by region",
caption = "Source: International GDP data") +
xlab("Country") +
ylab (" Total GDP") +
theme_minimal(base_size = 12)
A1 + geom_area ()

A2<- A1+ scale_fill_brewer(palette = "Set2")
Note:From 1990 to 2015 the most significant GDP growth occurred in
East Asia & Pacific. In contrast, the lowest change in GDP occurred
Sub-Saharan Africa, and the USA is in the middle range of regional GDP
growth.