library(tidyverse)Nations Assignment
Load Libraries
Go to Working Directory
getwd()[1] "/Users/darrenabou/Desktop/Spring 26/Data110"
Read dataset into the Global Environment
nations<- read_csv("nations.csv")Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(nations)# A tibble: 6 × 10
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AD AND Andorra 1996 NA 64291 10.9 2.8
2 AD AND Andorra 1994 NA 62707 10.9 3.2
3 AD AND Andorra 2003 NA 74783 10.3 2
4 AD AND Andorra 1990 NA 54511 11.9 4.3
5 AD AND Andorra 2009 NA 85474 9.9 1.7
6 AD AND Andorra 2011 NA 82326 NA 1.6
# ℹ 2 more variables: region <chr>, income <chr>
Cleaning datasets by removing all NA’s values
nations_clean<- nations|>
filter(! is.na(gdp_percap) & !is.na(population) & !is.na(birth_rate) & !is.na(neonat_mortal_rate) & !is.na(region) & !is.na(income) & !is.na(iso2c) & !is.na(iso3c) & !is.na(country) &!is.na(year))
nations_clean# A tibble: 4,303 × 10
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AE ARE United… 1991 73037. 1913190 24.6 7.9
2 AE ARE United… 1993 71960. 2127863 22.4 7.3
3 AE ARE United… 2001 83534. 3217865 15.8 5.5
4 AE ARE United… 1992 73154. 2019014 23.5 7.6
5 AE ARE United… 1994 74684. 2238281 21.3 6.9
6 AE ARE United… 2007 75427. 6010100 12.8 4.7
7 AE ARE United… 2004 87844. 3975945 14.2 5.1
8 AE ARE United… 1996 79480. 2467726 19.3 6.4
9 AE ARE United… 2006 82754. 5171255 13.3 4.9
10 AE ARE United… 2000 84975. 3050128 16.4 5.6
# ℹ 4,293 more rows
# ℹ 2 more variables: region <chr>, income <chr>
Creating a new variable consisting of calculated GDP
nations_GDP <- nations_clean |>
mutate(GDP = (gdp_percap*population)/10^12) |>
arrange(desc(GDP))
nations_GDP# A tibble: 4,303 × 11
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CN CHN China 2014 13255. 1364270000 12.4 5.9
2 US USA United… 2014 54398. 318907401 12.5 3.7
3 US USA United… 2013 52660. 316427395 12.4 3.8
4 CN CHN China 2013 12219. 1357380000 12.1 6.3
5 US USA United… 2012 51433. 314102623 12.6 3.9
6 US USA United… 2011 49782. 311718857 12.7 4
7 CN CHN China 2012 11220. 1350695000 12.1 6.9
8 US USA United… 2010 48374. 309346863 13 4.1
9 US USA United… 2008 48401. 304093966 14 4.3
10 US USA United… 2007 48062. 301231207 14.3 4.3
# ℹ 4,293 more rows
# ℹ 3 more variables: region <chr>, income <chr>, GDP <dbl>
Exploratory Data Analysis to get insights on the selected countries
nations_GDP|>
group_by(country)|>
summarise(count = n())# A tibble: 181 × 2
country count
<chr> <int>
1 Afghanistan 13
2 Albania 25
3 Algeria 25
4 Angola 25
5 Antigua and Barbuda 25
6 Armenia 25
7 Australia 25
8 Austria 25
9 Azerbaijan 25
10 Bahamas, The 25
# ℹ 171 more rows
unique(nations_GDP$country) [1] "China" "United States"
[3] "India" "Japan"
[5] "Germany" "Russian Federation"
[7] "Brazil" "Indonesia"
[9] "France" "United Kingdom"
[11] "Mexico" "Italy"
[13] "Korea, Rep." "Saudi Arabia"
[15] "Canada" "Spain"
[17] "Turkey" "Iran, Islamic Rep."
[19] "Australia" "Thailand"
[21] "Nigeria" "Poland"
[23] "Egypt, Arab Rep." "Pakistan"
[25] "Netherlands" "Malaysia"
[27] "South Africa" "Philippines"
[29] "Colombia" "United Arab Emirates"
[31] "Algeria" "Venezuela, RB"
[33] "Iraq" "Vietnam"
[35] "Bangladesh" "Belgium"
[37] "Switzerland" "Singapore"
[39] "Kazakhstan" "Sweden"
[41] "Austria" "Romania"
[43] "Ukraine" "Chile"
[45] "Peru" "Greece"
[47] "Norway" "Czech Republic"
[49] "Qatar" "Portugal"
[51] "Israel" "Kuwait"
[53] "Denmark" "Morocco"
[55] "Hungary" "Cuba"
[57] "Sri Lanka" "Ireland"
[59] "Finland" "Ecuador"
[61] "Libya" "Angola"
[63] "Belarus" "Uzbekistan"
[65] "New Zealand" "Azerbaijan"
[67] "Oman" "Sudan"
[69] "Slovak Republic" "Ethiopia"
[71] "Dominican Republic" "Kenya"
[73] "Tanzania" "Tunisia"
[75] "Bulgaria" "Guatemala"
[77] "Ghana" "Yemen, Rep."
[79] "Serbia" "Croatia"
[81] "Turkmenistan" "Panama"
[83] "Lithuania" "Jordan"
[85] "Lebanon" "Cote d'Ivoire"
[87] "Uruguay" "Costa Rica"
[89] "Bolivia" "Cameroon"
[91] "Uganda" "Nepal"
[93] "Slovenia" "Bahrain"
[95] "Afghanistan" "Zambia"
[97] "Paraguay" "Congo, Dem. Rep."
[99] "Luxembourg" "El Salvador"
[101] "Cambodia" "Latvia"
[103] "Trinidad and Tobago" "Bosnia and Herzegovina"
[105] "Mali" "Honduras"
[107] "Cyprus" "South Sudan"
[109] "Estonia" "Lao PDR"
[111] "Botswana" "Mongolia"
[113] "Georgia" "Senegal"
[115] "Madagascar" "Gabon"
[117] "Albania" "Mozambique"
[119] "Brunei Darussalam" "Nicaragua"
[121] "Equatorial Guinea" "Chad"
[123] "Burkina Faso" "Congo, Rep."
[125] "Macedonia, FYR" "Zimbabwe"
[127] "Armenia" "Jamaica"
[129] "Mauritius" "Tajikistan"
[131] "Benin" "Papua New Guinea"
[133] "West Bank and Gaza" "Malawi"
[135] "Kyrgyz Republic" "Rwanda"
[137] "Haiti" "Niger"
[139] "Moldova" "Mauritania"
[141] "Guinea" "Iceland"
[143] "Sierra Leone" "Malta"
[145] "Swaziland" "Togo"
[147] "Montenegro" "Bahamas, The"
[149] "Suriname" "Burundi"
[151] "Fiji" "Eritrea"
[153] "Bhutan" "Lesotho"
[155] "Guyana" "Maldives"
[157] "Barbados" "Central African Republic"
[159] "Liberia" "Gambia, The"
[161] "Belize" "Djibouti"
[163] "Timor-Leste" "Guinea-Bissau"
[165] "Seychelles" "Antigua and Barbuda"
[167] "St. Lucia" "Grenada"
[169] "Solomon Islands" "St. Vincent and the Grenadines"
[171] "Samoa" "Comoros"
[173] "Vanuatu" "St. Kitts and Nevis"
[175] "Dominica" "Sao Tome and Principe"
[177] "Tonga" "Micronesia, Fed. Sts."
[179] "Palau" "Kiribati"
[181] "Marshall Islands"
selected countries
nation_Africa <- nations_GDP |>
filter(country %in% c("South Africa", "Cameroon", "Nigeria", "Morocco"))library(RColorBrewer)
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
African_chart<-ggplot(nation_Africa, aes(x= year, y= GDP, color = country, group = country))+
geom_line(linewidth = 1.5) +
geom_point()+
scale_color_brewer(palette = "Set1")+
labs(
title = "Africa's Largest Economy",
x = "year",
y="GDP( $trillions)"
)+
theme_minimal(base_size = 12)
ggplotly(African_chart)library(RColorBrewer)
library(plotly)
African_chart<-ggplot(nation_Africa, aes(x= year, y= GDP, color = country, group = country))+
geom_line(linewidth = 1.5) +
geom_point()+
scale_color_brewer(palette = "Set1")+
labs(
title = "Africa's Largest Economy",
x = "year",
y="GDP( $trillions)"
)+
theme_minimal(base_size = 12)+
facet_wrap(~country)+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(African_chart)When we talk about Africa’s economic story, four countries inevitably dominate the conversation; Nigeria, South Africa, Morocco, and Cameroon. This is why my choice was directed to them. To see which country was really the leading one. These two charts bring insights to a new dimension, and what they reveal is both staggering.
The first chart places all four countries on the same canvas, and the contrast is immediately striking. For most of the 1990s, the four nations tracked relatively closely together,However, Nigeria breaks away from the pack entirely. Its GDP line bends sharply upward, eventually surpassing $1 trillion by 2014. On the other hand, South Africa, long considered the continent’s most industrialized economy, grows steadily but is ultimately overtaken. While Cameroon, despite its potential, remains frustratingly flat near the bottom throughout the entire 25-year period.