library(ggplot2)
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## v purrr 0.3.3
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
Get Working Directory:
getwd()
## [1] "C:/Users/Maggie/Documents"
nations_dataset <- read_csv("C:/Users/Maggie/Downloads/nations.csv")
## Parsed with column specification:
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
View Dataset:
view(nations_dataset)
str(nations_dataset)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 5275 obs. of 10 variables:
## $ iso2c : chr "AD" "AD" "AD" "AD" ...
## $ iso3c : chr "AND" "AND" "AND" "AND" ...
## $ country : chr "Andorra" "Andorra" "Andorra" "Andorra" ...
## $ year : num 1996 1994 2003 1990 2009 ...
## $ gdp_percap : num NA NA NA NA NA NA NA NA NA NA ...
## $ population : num 64291 62707 74783 54511 85474 ...
## $ birth_rate : num 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
## $ neonat_mortal_rate: num 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
## $ region : chr "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
## $ income : chr "High income" "High income" "High income" "High income" ...
## - attr(*, "spec")=
## .. cols(
## .. iso2c = col_character(),
## .. iso3c = col_character(),
## .. country = col_character(),
## .. year = col_double(),
## .. gdp_percap = col_double(),
## .. population = col_double(),
## .. birth_rate = col_double(),
## .. neonat_mortal_rate = col_double(),
## .. region = col_character(),
## .. income = col_character()
## .. )
New Variable:
With_GDP <- mutate(nations_dataset, gdp_percap * population / 1000000000000 )
view(With_GDP)
unique(With_GDP$country)
## [1] "Andorra" "United Arab Emirates"
## [3] "Afghanistan" "Antigua and Barbuda"
## [5] "Albania" "Armenia"
## [7] "Angola" "Argentina"
## [9] "American Samoa" "Austria"
## [11] "Australia" "Aruba"
## [13] "Azerbaijan" "Bosnia and Herzegovina"
## [15] "Barbados" "Bangladesh"
## [17] "Belgium" "Burkina Faso"
## [19] "Bulgaria" "Bahrain"
## [21] "Burundi" "Benin"
## [23] "Bermuda" "Brunei Darussalam"
## [25] "Bolivia" "Brazil"
## [27] "Bahamas, The" "Bhutan"
## [29] "Botswana" "Belarus"
## [31] "Belize" "Canada"
## [33] "Congo, Dem. Rep." "Central African Republic"
## [35] "Congo, Rep." "Switzerland"
## [37] "Cote d'Ivoire" "Chile"
## [39] "Cameroon" "China"
## [41] "Colombia" "Costa Rica"
## [43] "Cuba" "Curacao"
## [45] "Cyprus" "Czech Republic"
## [47] "Germany" "Djibouti"
## [49] "Denmark" "Dominica"
## [51] "Dominican Republic" "Algeria"
## [53] "Ecuador" "Estonia"
## [55] "Egypt, Arab Rep." "Eritrea"
## [57] "Spain" "Ethiopia"
## [59] "Finland" "Fiji"
## [61] "Micronesia, Fed. Sts." "France"
## [63] "Gabon" "United Kingdom"
## [65] "Grenada" "Georgia"
## [67] "Ghana" "Gibraltar"
## [69] "Greenland" "Gambia, The"
## [71] "Guinea" "Equatorial Guinea"
## [73] "Greece" "Guatemala"
## [75] "Guam" "Guinea-Bissau"
## [77] "Guyana" "Hong Kong SAR, China"
## [79] "Honduras" "Croatia"
## [81] "Haiti" "Hungary"
## [83] "Indonesia" "Ireland"
## [85] "Israel" "Isle of Man"
## [87] "India" "Iraq"
## [89] "Iran, Islamic Rep." "Iceland"
## [91] "Italy" "Channel Islands"
## [93] "Jamaica" "Jordan"
## [95] "Japan" "Kenya"
## [97] "Kyrgyz Republic" "Cambodia"
## [99] "Kiribati" "Comoros"
## [101] "St. Kitts and Nevis" "Korea, Rep."
## [103] "Kuwait" "Cayman Islands"
## [105] "Kazakhstan" "Lao PDR"
## [107] "Lebanon" "St. Lucia"
## [109] "Liechtenstein" "Sri Lanka"
## [111] "Liberia" "Lesotho"
## [113] "Lithuania" "Luxembourg"
## [115] "Latvia" "Libya"
## [117] "Morocco" "Monaco"
## [119] "Moldova" "Montenegro"
## [121] "St. Martin (French part)" "Madagascar"
## [123] "Marshall Islands" "Macedonia, FYR"
## [125] "Mali" "Myanmar"
## [127] "Mongolia" "Macao SAR, China"
## [129] "Northern Mariana Islands" "Mauritania"
## [131] "Malta" "Mauritius"
## [133] "Maldives" "Malawi"
## [135] "Mexico" "Malaysia"
## [137] "Mozambique" "Namibia"
## [139] "New Caledonia" "Niger"
## [141] "Nigeria" "Nicaragua"
## [143] "Netherlands" "Norway"
## [145] "Nepal" "New Zealand"
## [147] "Oman" "Panama"
## [149] "Peru" "French Polynesia"
## [151] "Papua New Guinea" "Philippines"
## [153] "Pakistan" "Poland"
## [155] "Puerto Rico" "West Bank and Gaza"
## [157] "Portugal" "Palau"
## [159] "Paraguay" "Qatar"
## [161] "Romania" "Serbia"
## [163] "Russian Federation" "Rwanda"
## [165] "Saudi Arabia" "Solomon Islands"
## [167] "Seychelles" "Sudan"
## [169] "Sweden" "Singapore"
## [171] "Slovenia" "Slovak Republic"
## [173] "Sierra Leone" "San Marino"
## [175] "Senegal" "Somalia"
## [177] "Suriname" "South Sudan"
## [179] "Sao Tome and Principe" "El Salvador"
## [181] "Sint Maarten (Dutch part)" "Syrian Arab Republic"
## [183] "Swaziland" "Turks and Caicos Islands"
## [185] "Chad" "Togo"
## [187] "Thailand" "Tajikistan"
## [189] "Timor-Leste" "Turkmenistan"
## [191] "Tunisia" "Tonga"
## [193] "Turkey" "Trinidad and Tobago"
## [195] "Tuvalu" "Tanzania"
## [197] "Ukraine" "Uganda"
## [199] "United States" "Uruguay"
## [201] "Uzbekistan" "St. Vincent and the Grenadines"
## [203] "Venezuela, RB" "Virgin Islands (U.S.)"
## [205] "Vietnam" "Vanuatu"
## [207] "Samoa" "Yemen, Rep."
## [209] "South Africa" "Zambia"
## [211] "Zimbabwe"
Filter Countries:
filtered_countries <- filter(With_GDP, country %in% c("China","Germany","Japan", "United States"))
view(filtered_countries)
Look Up Column Names:
str(filtered_countries)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 100 obs. of 11 variables:
## $ iso2c : chr "CN" "CN" "CN" "CN" ...
## $ iso3c : chr "CHN" "CHN" "CHN" "CHN" ...
## $ country : chr "China" "China" "China" "China" ...
## $ year : num 1992 2005 2000 1991 2013 ...
## $ gdp_percap : num 1260 5053 2915 1091 12219 ...
## $ population : num 1.16e+09 1.30e+09 1.26e+09 1.15e+09 1.36e+09 ...
## $ birth_rate : num 18.3 12.4 14 19.7 12.1 ...
## $ neonat_mortal_rate : num 29.4 14 21.2 29.7 6.3 22.2 5.9 17.1 15.5 28.8 ...
## $ region : chr "East Asia & Pacific" "East Asia & Pacific" "East Asia & Pacific" "East Asia & Pacific" ...
## $ income : chr "Upper middle income" "Upper middle income" "Upper middle income" "Upper middle income" ...
## $ gdp_percap * population/1e+12: num 1.47 6.59 3.68 1.26 16.59 ...
colnames(filtered_countries)
## [1] "iso2c" "iso3c"
## [3] "country" "year"
## [5] "gdp_percap" "population"
## [7] "birth_rate" "neonat_mortal_rate"
## [9] "region" "income"
## [11] "gdp_percap * population/1e+12"
Chart Template for Chart #1:
largest_economy_chart_template <- ggplot(filtered_countries, aes(x = year, y = gdp_percap * population / 1e+12, colour = country)) +
xlab("year") +
ylab("GDP ($ trillion)")
largest_economy_chart_template

Chart #1:
largest_economy_chart <- largest_economy_chart_template +
geom_point() +
geom_line() +
ggtitle("China's Rise to Become the Largest Economy") +
scale_color_brewer(palette = "Set1")
largest_economy_chart

Group_by and Summarise Function:
GDP_by_Region_Chart <- With_GDP %>%
group_by(region, year) %>%
summarise(sum = sum(gdp_percap * population/1e+12, na.rm = T))
GDP_by_Region_Chart
## # A tibble: 175 x 3
## # Groups: region [7]
## region year sum
## <chr> <dbl> <dbl>
## 1 East Asia & Pacific 1990 5.52
## 2 East Asia & Pacific 1991 6.03
## 3 East Asia & Pacific 1992 6.50
## 4 East Asia & Pacific 1993 7.04
## 5 East Asia & Pacific 1994 7.64
## 6 East Asia & Pacific 1995 8.29
## 7 East Asia & Pacific 1996 8.96
## 8 East Asia & Pacific 1997 9.55
## 9 East Asia & Pacific 1998 9.60
## 10 East Asia & Pacific 1999 10.1
## # ... with 165 more rows
Chart #2:
ggplot(GDP_by_Region_Chart, aes(x = year, y = sum, fill = region, )) +
xlab("year") +
ylab("GDP($ trillion)") +
geom_area(color = "white") +
scale_fill_brewer(palette = "Set2") +
ggtitle("GDP by World Bank Region")
