Get some information about nations dataset

# Check the results
str(nations)            # structure of nations dataset  -- most info 
## spec_tbl_df [5,275 x 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ iso2c             : chr [1:5275] "AD" "AD" "AD" "AD" ...
##  $ iso3c             : chr [1:5275] "AND" "AND" "AND" "AND" ...
##  $ country           : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
##  $ year              : num [1:5275] 1996 1994 2003 1990 2009 ...
##  $ gdp_percap        : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
##  $ population        : num [1:5275] 64291 62707 74783 54511 85474 ...
##  $ birth_rate        : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
##  $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
##  $ region            : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
##  $ income            : chr [1:5275] "High income" "High income" "High income" "High income" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   iso2c = col_character(),
##   ..   iso3c = col_character(),
##   ..   country = col_character(),
##   ..   year = col_double(),
##   ..   gdp_percap = col_double(),
##   ..   population = col_double(),
##   ..   birth_rate = col_double(),
##   ..   neonat_mortal_rate = col_double(),
##   ..   region = col_character(),
##   ..   income = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
#typeof(nations)         # type of nations dataset - list
#attributes(nations)     # type of nations dataset

#tail(nations)           # last six rows of nations dataset
dim(nations)            # number of rows and conlumns of nations dataset 
## [1] 5275   10
#nrow(nations)           # number of rows of nations dataset
#ncol(nations)           # number of conlumns of nations dataset
names(nations)          # column names of nations dataset
##  [1] "iso2c"              "iso3c"              "country"           
##  [4] "year"               "gdp_percap"         "population"        
##  [7] "birth_rate"         "neonat_mortal_rate" "region"            
## [10] "income"
#sapply(nations, class)  # the class of each column of nations dataset

Clean up the datasets

remove the NA values from gdp_percap) and year

nations_nona <- nations %>%
 filter(!is.na(gdp_percap) & !is.na(year)) 
head(nations_nona)
## # A tibble: 6 x 10
##   iso2c iso3c country     year gdp_percap population birth_rate neonat_mortal_r~
##   <chr> <chr> <chr>      <dbl>      <dbl>      <dbl>      <dbl>            <dbl>
## 1 AE    ARE   United Ar~  1991     73037.    1913190       24.6              7.9
## 2 AE    ARE   United Ar~  1993     71960.    2127863       22.4              7.3
## 3 AE    ARE   United Ar~  2001     83534.    3217865       15.8              5.5
## 4 AE    ARE   United Ar~  1992     73154.    2019014       23.5              7.6
## 5 AE    ARE   United Ar~  1994     74684.    2238281       21.3              6.9
## 6 AE    ARE   United Ar~  2007     75427.    6010100       12.8              4.7
## # ... with 2 more variables: region <chr>, income <chr>
# Check the result
#str(nations_nona) 
#dim(nations_nona)

Compute and Add a New Variable to a Dataframe in R with mutate() - get the following sample code from https://www.marsja.se/how-to-add-a-column-to-dataframe-in-r-with-tibble-dplyr/

first need to create a new variable in the data, using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion.

# Insert new column with mutate - sample code
#dataf <- dataf %>% 
#  mutate(DepressionIndex = mean(c_across(Depr1:Depr5))) %>%
#  head()

# Figure out GDP in a trillion
nations_nona <- nations_nona %>% 
  mutate(GDP = (gdp_percap*population)/10^12) %>%

# Check the result
str(nations_nona) 
## spec_tbl_df [4,509 x 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ iso2c             : chr [1:4509] "AE" "AE" "AE" "AE" ...
##  $ iso3c             : chr [1:4509] "ARE" "ARE" "ARE" "ARE" ...
##  $ country           : chr [1:4509] "United Arab Emirates" "United Arab Emirates" "United Arab Emirates" "United Arab Emirates" ...
##  $ year              : num [1:4509] 1991 1993 2001 1992 1994 ...
##  $ gdp_percap        : num [1:4509] 73037 71960 83534 73154 74684 ...
##  $ population        : num [1:4509] 1913190 2127863 3217865 2019014 2238281 ...
##  $ birth_rate        : num [1:4509] 24.6 22.4 15.8 23.5 21.3 ...
##  $ neonat_mortal_rate: num [1:4509] 7.9 7.3 5.5 7.6 6.9 4.7 5.1 6.4 4.9 5.6 ...
##  $ region            : chr [1:4509] "Middle East & North Africa" "Middle East & North Africa" "Middle East & North Africa" "Middle East & North Africa" ...
##  $ income            : chr [1:4509] "High income" "High income" "High income" "High income" ...
##  $ GDP               : num [1:4509] 0.14 0.153 0.269 0.148 0.167 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   iso2c = col_character(),
##   ..   iso3c = col_character(),
##   ..   country = col_character(),
##   ..   year = col_double(),
##   ..   gdp_percap = col_double(),
##   ..   population = col_double(),
##   ..   birth_rate = col_double(),
##   ..   neonat_mortal_rate = col_double(),
##   ..   region = col_character(),
##   ..   income = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
#dim(nations_nona)

Draw first chart

Map variables in the data onto the X and Y axes and change the axes labels and theme

# Change the theme
ggplot(nations, aes(x = year, y = gdp_percap)) +
#ggplot(nations_nona, aes(x = year, y = gdp_percap)) +
 xlab("year") +
 ylab("GDP ($ trillion)") +
 theme_minimal(base_size = 12) 

# Include all the related counties
p1 <- ggplot(nations, aes(x = year, y = gdp_percap)) +
#p1 <- ggplot(nations_nona, aes(x = year, y = gdp_percap)) +
 labs(title = "GDP VERSUS YEAR FOR EACH COUNTRY",
 caption = "Source: The World Bank and Siadi") +
 xlab("year") +
 ylab("GDP ($ trillion)") +
 #filter(starwars, species == "Human) +
 #filter(nations_nona, country  == "China")+  
 theme_minimal(base_size = 12)
p1 + geom_point()
## Warning: Removed 766 rows containing missing values (geom_point).

the scatterplot appears to show a correlation

p3 <- p2 + xlim(1990,2015)+ ylim(0,10^5)
p3 + geom_point()
## Warning: Removed 781 rows containing missing values (geom_point).

Add a smoother in red with a confidence interval

p4 <- p3 + geom_point() + geom_smooth(color = "red")
p4
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 781 rows containing non-finite values (stat_smooth).
## Warning: Removed 781 rows containing missing values (geom_point).

Add a linear regression with confidence interval

p5 <- p3 + geom_point() + geom_smooth(method='lm',formula=y~x)
p5
## Warning: Removed 781 rows containing non-finite values (stat_smooth).
## Warning: Removed 781 rows containing missing values (geom_point).

Add a title, make the line dashed, and remove the confidence interval band

p6 <- p3 + geom_point() + geom_smooth(method='lm',formula=y~x, se = FALSE, linetype= "dotdash", size = 0.3) +
  ggtitle("GDP VERSUS YEAR IN THE SELECTED COUNTRIES")
p6 
## Warning: Removed 781 rows containing non-finite values (stat_smooth).
## Warning: Removed 781 rows containing missing values (geom_point).

The second chart

###For the second chart, using dplyr you will need to group_by region and year, and then summarize on your mutated value for gdp using summarise(GDP = sum(gdp, na.rm = TRUE)). (There will be null values, or NAs, in this data, so you will need to use na.rm = TRUE).

# Change the theme
ggplot(nations, aes(x = year, y = gdp_percap)) +
#ggplot(nations_nona, aes(x = year, y = gdp_percap)) +
 xlab("year") +
 ylab("GDP ($ trillion)") +
 theme_minimal(base_size = 12) 

# Include all the related counties and generated by the command geom_area () 
p21 <- ggplot(nations, aes(x = year, y = gdp_percap)) +
#p21 <- ggplot(nations_nona, aes(x = year, y = gdp_percap)) +
 labs(title = "GDP VERSUS YEAR FOR EACH COUNTRY",
 caption = "Source: The World Bank and Siadi") +
 xlab("year") +
 ylab("GDP ($ trillion)") +
 #filter(starwars, species == "Human) +
 #filter(nations_nona, country  == "China")+  
 theme_minimal(base_size = 12)
p21 + geom_area () 
## Warning: Removed 766 rows containing missing values (position_stack).