getwd()[1] "/Users/marieadelegrosso/Desktop/Desktop - Marie’s MacBook Air (2)/Data"
getwd()[1] "/Users/marieadelegrosso/Desktop/Desktop - Marie’s MacBook Air (2)/Data"
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.6
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.1 ✔ tibble 3.3.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.2
✔ purrr 1.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
setwd("/Users/marieadelegrosso/Desktop/Desktop - Marie’s MacBook Air (2)/Data")
nations <- read_csv("nations.csv")Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
newnations <- read_csv("nations_new.csv")Rows: 13184 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): country, iso3c, region, wb_income
dbl (5): year, birth_rate, neonatal_death_rate, population, gdp_per_cap
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(nations)# A tibble: 6 × 10
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AD AND Andorra 1996 NA 64291 10.9 2.8
2 AD AND Andorra 1994 NA 62707 10.9 3.2
3 AD AND Andorra 2003 NA 74783 10.3 2
4 AD AND Andorra 1990 NA 54511 11.9 4.3
5 AD AND Andorra 2009 NA 85474 9.9 1.7
6 AD AND Andorra 2011 NA 82326 NA 1.6
# ℹ 2 more variables: region <chr>, income <chr>
head(newnations)# A tibble: 6 × 9
country iso3c year birth_rate neonatal_death_rate population gdp_per_cap
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Aruba ABW 1960 33.9 NA 54608 NA
2 Aruba ABW 1961 32.8 NA 55811 NA
3 Aruba ABW 1962 31.6 NA 56682 NA
4 Aruba ABW 1963 30.4 NA 57475 NA
5 Aruba ABW 1964 29.1 NA 58178 NA
6 Aruba ABW 1965 27.9 NA 58782 NA
# ℹ 2 more variables: region <chr>, wb_income <chr>
nations_nona <- nations |>
filter(!is.na(gdp_percap) & !is.na(population))nationsgdp <- nations_nona |>
mutate(gdp = gdp_percap * population /1000000000000)
head(nationsgdp)# A tibble: 6 × 11
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AE ARE United … 1991 73037. 1913190 24.6 7.9
2 AE ARE United … 1993 71960. 2127863 22.4 7.3
3 AE ARE United … 2001 83534. 3217865 15.8 5.5
4 AE ARE United … 1992 73154. 2019014 23.5 7.6
5 AE ARE United … 1994 74684. 2238281 21.3 6.9
6 AE ARE United … 2007 75427. 6010100 12.8 4.7
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>
newnations_nona <- newnations |>
filter(!is.na(gdp_per_cap) & !is.na(population))newnationsgdp <- newnations_nona |>
mutate(gdp = gdp_per_cap * population /1000000000000)
head(newnationsgdp)# A tibble: 6 × 10
country iso3c year birth_rate neonatal_death_rate population gdp_per_cap
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Aruba ABW 1986 20.6 NA 64553 6283.
2 Aruba ABW 1987 20.3 NA 64450 7567.
3 Aruba ABW 1988 19.8 NA 64332 9275.
4 Aruba ABW 1989 19.2 NA 64596 10767.
5 Aruba ABW 1990 18.7 NA 65712 11639.
6 Aruba ABW 1991 17.7 NA 67864 12850.
# ℹ 3 more variables: region <chr>, wb_income <chr>, gdp <dbl>
fournations <- nationsgdp |>
filter((country== "France" | country== "United States" | country== "United Kingdom" | country== "China" ))
head(fournations )# A tibble: 6 × 11
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CN CHN China 1992 1260. 1164970000 18.3 29.4
2 CN CHN China 2005 5053. 1303720000 12.4 14
3 CN CHN China 2000 2915. 1262645000 14.0 21.2
4 CN CHN China 1991 1091. 1150780000 19.7 29.7
5 CN CHN China 2013 12219. 1357380000 12.1 6.3
6 CN CHN China 1999 2650. 1252735000 14.6 22.2
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>
plot1 <- fournations |>
ggplot((aes(year,
gdp,
group = country,
colour = country))) +
theme_bw() +
geom_point() +
geom_line () +
labs(title = "GDP Over Time for P5 Countries",
x = "Years (1990-2015)",
y = "GDP (in $ Trillions)",
caption = "Source: Class Data Source") +
scale_color_brewer(palette = "Set1")
plot1nationsregion <- newnationsgdp |>
group_by(region, year) |>
summarise(gdp = sum(gdp, na.rm = TRUE)) |>
mutate(percentage = gdp / sum(gdp)) `summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
nationsregion# A tibble: 448 × 4
# Groups: region [7]
region year gdp percentage
<chr> <dbl> <dbl> <dbl>
1 eastAsiaAndPacific 1960 0.0867 0.000299
2 eastAsiaAndPacific 1961 0.0989 0.000341
3 eastAsiaAndPacific 1962 0.105 0.000361
4 eastAsiaAndPacific 1963 0.118 0.000407
5 eastAsiaAndPacific 1964 0.135 0.000465
6 eastAsiaAndPacific 1965 0.148 0.000508
7 eastAsiaAndPacific 1966 0.167 0.000575
8 eastAsiaAndPacific 1967 0.197 0.000677
9 eastAsiaAndPacific 1968 0.226 0.000778
10 eastAsiaAndPacific 1969 0.262 0.000902
# ℹ 438 more rows
plot2 <- nationsregion |>
ggplot(aes(x=year, y=gdp, fill=region)) +
geom_area(alpha=1 , size=0.25, colour="white") +
theme_bw() +
labs(title = "GDP Over Time by Region",
x = "Years (1990-2015)",
y = "GDP (in $ Trillions)",
caption = "Source: Class Data Source") +
scale_fill_brewer(palette = "Set2")Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
plot2#I used this cite https://r-graph-gallery.com/136-stacked-area-chart.html for reference when I got lost