#set working directory
setwd("C:/Users/12403/Desktop/MC 2020/MC Fall 2020/DATA110/")
#read csv file into dataframe object
df0 <- read.csv("nations.csv")
#mutate step 1
df1 <- df0 %>% mutate(gdp_pop = gdp_percap * population)
#mutate step 2
df2 <- df1 %>% mutate(gdp = gdp_pop / 1000000000)
#filter by countries of my choice
df3 <- df2 %>% filter(country == "Brazil" | country == "Russian Federation" | country == "India" | country ==  "China" | country == "South Africa")
#create base plot for first chart with labels and colors
p1 <- ggplot(df3, aes(x = year, y = gdp, colour = country)) +
      xlab("Year") +
      ylab("GDP ($ trillion)") +
      ggtitle("China's Rise to Become the Largest Economy amongst BRICS") +
      labs(colour = "Country")
#add geom_point layer and palette
p2 <- p1 +
      geom_point() +
      scale_color_brewer(palette = "Set1")
#add geom_line layer
p3 <- p2 + geom_line()
#display "China's Rise to Become the Largest Economy amongst BRICS"
p3

#group by variables I would like to display for chart 2, and remove na's
df4 <- df3 %>% group_by(region, year) %>% summarise(GDP = sum(gdp, na.rm = TRUE))
## `summarise()` regrouping output by 'region' (override with `.groups` argument)
#create base plot fpr second chart with labels
p4 <- ggplot(df4, aes(x = year, y = GDP)) +
      xlab("Year") +
      ylab("GDP ($ trillion)") +
      ggtitle("GDP by World Bank Region")
#add geom_area layer, colors and final label
p5 <- p4 + geom_area(aes(fill = region)) +
      scale_fill_brewer(palette = "Set2") +
      labs(fill = "Region")
#display " GDP by World Bank Region"
p5