getwd()
## [1] "/Users/h0age/Documents/data110/Week 7"
setwd("/Users/h0age/Documents/data110/Week 7")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dbplyr)
##
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
library(RColorBrewer)
nations <- read_csv("nations.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
look at structure of data
colnames(nations)
## [1] "iso2c" "iso3c" "country"
## [4] "year" "gdp_percap" "population"
## [7] "birth_rate" "neonat_mortal_rate" "region"
## [10] "income"
Make a new variable GDP for each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion.
natl_gdp <- nations %>% ## new data frame natl_gdp using nations data
mutate(GDP = gdp_percap * population / 1000000000000) ## create new GDP column from percap gdp and population
natl_gdp
## # A tibble: 5,275 x 11
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## # … with 5,265 more rows, and 3 more variables: region <chr>, income <chr>,
## # GDP <dbl>
new object china_rise - filter out China Gernmany Japan US
china_rise <- natl_gdp %>% ## new object
filter(country == "China" | country == "Germany" | country == "Japan" | country == "United States")
plot 1
plot1 <- china_rise %>% ## new object into
ggplot(aes(year, GDP, color = country)) +
geom_point() + ## denotes point graph
geom_line() + ## adds line connecting dots
ylab("GDP $ Trillions") + ## edit y axis label
scale_colour_brewer(palette = "Set1") + ## sets color pallet
ggtitle("China's Rise to Become the World's Largest Economy") ## sets title
plot1

new object by_region displaying data by region
by_region <- natl_gdp %>% ## new object grouping by region
group_by(region, year) %>% ## grouping by region
summarise(GDP = sum(GDP, na.rm = TRUE)) ##%>% ## remove NA data from GDP
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.
plot 2 (b)
plot2b <- by_region %>% ## using by_
ggplot(aes(x = year, y = GDP, fill = region)) + ## parameters of the chart
geom_area(color = "white") + ## area plot
## scale_fill_brewer(palette = "Set2") + ## set color pallet
theme_dark()
##scale_fill_discrete(name = "Region") + ## labeling for legend
plot2b
