library(tidyverse)
library(dplyr)
library(RColorBrewer)
library(plotly)
Nations HW
libraries
datasets
setwd("C:/Users/desir_7411ic3/Desktop/Montgomery College/DATA110/DATASETS-20240830T194929Z-001/DATASETS")
<- read_csv("nations.csv") nations_orig
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(nations_orig)
# A tibble: 6 × 10
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AD AND Andorra 1996 NA 64291 10.9 2.8
2 AD AND Andorra 1994 NA 62707 10.9 3.2
3 AD AND Andorra 2003 NA 74783 10.3 2
4 AD AND Andorra 1990 NA 54511 11.9 4.3
5 AD AND Andorra 2009 NA 85474 9.9 1.7
6 AD AND Andorra 2011 NA 82326 NA 1.6
# ℹ 2 more variables: region <chr>, income <chr>
summary(nations_orig)
iso2c iso3c country year
Length:5275 Length:5275 Length:5275 Min. :1990
Class :character Class :character Class :character 1st Qu.:1996
Mode :character Mode :character Mode :character Median :2002
Mean :2002
3rd Qu.:2008
Max. :2014
gdp_percap population birth_rate neonat_mortal_rate
Min. : 239.7 Min. :9.004e+03 Min. : 6.90 Min. : 0.70
1st Qu.: 2263.6 1st Qu.:7.175e+05 1st Qu.:13.40 1st Qu.: 6.70
Median : 6563.2 Median :5.303e+06 Median :21.60 Median :15.00
Mean : 12788.8 Mean :2.958e+07 Mean :24.16 Mean :19.40
3rd Qu.: 17195.0 3rd Qu.:1.757e+07 3rd Qu.:33.88 3rd Qu.:29.48
Max. :141968.1 Max. :1.364e+09 Max. :55.12 Max. :73.10
NA's :766 NA's :14 NA's :295 NA's :525
region income
Length:5275 Length:5275
Class :character Class :character
Mode :character Mode :character
GDP column
<- nations_orig %>%
dataset mutate(gdp = (nations_orig$gdp_percap * nations_orig$population) / 1000000000000)
summary(dataset)
iso2c iso3c country year
Length:5275 Length:5275 Length:5275 Min. :1990
Class :character Class :character Class :character 1st Qu.:1996
Mode :character Mode :character Mode :character Median :2002
Mean :2002
3rd Qu.:2008
Max. :2014
gdp_percap population birth_rate neonat_mortal_rate
Min. : 239.7 Min. :9.004e+03 Min. : 6.90 Min. : 0.70
1st Qu.: 2263.6 1st Qu.:7.175e+05 1st Qu.:13.40 1st Qu.: 6.70
Median : 6563.2 Median :5.303e+06 Median :21.60 Median :15.00
Mean : 12788.8 Mean :2.958e+07 Mean :24.16 Mean :19.40
3rd Qu.: 17195.0 3rd Qu.:1.757e+07 3rd Qu.:33.88 3rd Qu.:29.48
Max. :141968.1 Max. :1.364e+09 Max. :55.12 Max. :73.10
NA's :766 NA's :14 NA's :295 NA's :525
region income gdp
Length:5275 Length:5275 Min. : 0.0000
Class :character Class :character 1st Qu.: 0.0077
Mode :character Mode :character Median : 0.0324
Mean : 0.3259
3rd Qu.: 0.1849
Max. :18.0829
NA's :766
First chart
create dataset for first chart
<- dataset %>%
chart_of_4 filter(country %in% c("China","Germany","Japan","United States"))
head(dataset)
# A tibble: 6 × 11
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AD AND Andorra 1996 NA 64291 10.9 2.8
2 AD AND Andorra 1994 NA 62707 10.9 3.2
3 AD AND Andorra 2003 NA 74783 10.3 2
4 AD AND Andorra 1990 NA 54511 11.9 4.3
5 AD AND Andorra 2009 NA 85474 9.9 1.7
6 AD AND Andorra 2011 NA 82326 NA 1.6
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>
plot 1 design
<- chart_of_4 |>
plot1 ggplot(aes(x=year,y=gdp, color=country)) + ## data
geom_point() + ## plots points
geom_line() + ## draws lines between points
labs(x="Year", y="GDP ($ trillion)", title="China's Rise to Become the Largest Economy", caption="From World Bank Data") + ## info
theme_bw() + ## change background
theme(
panel.border = element_blank() ## remove border
+
) scale_color_brewer(palette="Set1") ## SUPPOSED to set colors
## display plot plot1
Second chart
plot 2 dataset
<- dataset %>%
data2 group_by(region, year) %>%
summarise(sum_GDP = sum(gdp, na.rm = TRUE))
summary(data2)
region year sum_GDP
Length:175 Min. :1990 Min. : 0.7865
Class :character 1st Qu.:1996 1st Qu.: 2.8400
Mode :character Median :2002 Median : 6.5393
Mean :2002 Mean : 8.3982
3rd Qu.:2008 3rd Qu.:11.6291
Max. :2014 Max. :32.5209
plot 2 design
<- data2 |>
plot2 ggplot(aes(x=year,y=sum_GDP,fill=region)) +
geom_area(color="white",linewidth=0.2) +
scale_fill_brewer(palette = "Set2") +
labs(x="year",y="GDP ($ trillion)",title="GDP by World Bank Region",caption="From World Bank Data")+
theme_bw() +
theme(
panel.border=element_blank()
) plot2