# Check the results
str(nations) # structure of nations dataset -- most info
## spec_tbl_df [5,275 x 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ iso2c : chr [1:5275] "AD" "AD" "AD" "AD" ...
## $ iso3c : chr [1:5275] "AND" "AND" "AND" "AND" ...
## $ country : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
## $ year : num [1:5275] 1996 1994 2003 1990 2009 ...
## $ gdp_percap : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
## $ population : num [1:5275] 64291 62707 74783 54511 85474 ...
## $ birth_rate : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
## $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
## $ region : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
## $ income : chr [1:5275] "High income" "High income" "High income" "High income" ...
## - attr(*, "spec")=
## .. cols(
## .. iso2c = col_character(),
## .. iso3c = col_character(),
## .. country = col_character(),
## .. year = col_double(),
## .. gdp_percap = col_double(),
## .. population = col_double(),
## .. birth_rate = col_double(),
## .. neonat_mortal_rate = col_double(),
## .. region = col_character(),
## .. income = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
#typeof(nations) # type of nations dataset - list
#attributes(nations) # type of nations dataset
#tail(nations) # last six rows of nations dataset
dim(nations) # number of rows and conlumns of nations dataset
## [1] 5275 10
#nrow(nations) # number of rows of nations dataset
#ncol(nations) # number of conlumns of nations dataset
names(nations) # column names of nations dataset
## [1] "iso2c" "iso3c" "country"
## [4] "year" "gdp_percap" "population"
## [7] "birth_rate" "neonat_mortal_rate" "region"
## [10] "income"
#sapply(nations, class) # the class of each column of nations dataset
nations_nona <- nations %>%
filter(!is.na(gdp_percap) & !is.na(year))
head(nations_nona)
## # A tibble: 6 x 10
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_r~
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AE ARE United Ar~ 1991 73037. 1913190 24.6 7.9
## 2 AE ARE United Ar~ 1993 71960. 2127863 22.4 7.3
## 3 AE ARE United Ar~ 2001 83534. 3217865 15.8 5.5
## 4 AE ARE United Ar~ 1992 73154. 2019014 23.5 7.6
## 5 AE ARE United Ar~ 1994 74684. 2238281 21.3 6.9
## 6 AE ARE United Ar~ 2007 75427. 6010100 12.8 4.7
## # ... with 2 more variables: region <chr>, income <chr>
# Check the result
#str(nations_nona)
#dim(nations_nona)
# Insert new column with mutate - sample code
#dataf <- dataf %>%
# mutate(DepressionIndex = mean(c_across(Depr1:Depr5))) %>%
# head()
# Figure out GDP in a trillion
nations_nona <- nations_nona %>%
mutate(GDP = (gdp_percap*population)/10^12) %>%
# Check the result
str(nations_nona)
## spec_tbl_df [4,509 x 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ iso2c : chr [1:4509] "AE" "AE" "AE" "AE" ...
## $ iso3c : chr [1:4509] "ARE" "ARE" "ARE" "ARE" ...
## $ country : chr [1:4509] "United Arab Emirates" "United Arab Emirates" "United Arab Emirates" "United Arab Emirates" ...
## $ year : num [1:4509] 1991 1993 2001 1992 1994 ...
## $ gdp_percap : num [1:4509] 73037 71960 83534 73154 74684 ...
## $ population : num [1:4509] 1913190 2127863 3217865 2019014 2238281 ...
## $ birth_rate : num [1:4509] 24.6 22.4 15.8 23.5 21.3 ...
## $ neonat_mortal_rate: num [1:4509] 7.9 7.3 5.5 7.6 6.9 4.7 5.1 6.4 4.9 5.6 ...
## $ region : chr [1:4509] "Middle East & North Africa" "Middle East & North Africa" "Middle East & North Africa" "Middle East & North Africa" ...
## $ income : chr [1:4509] "High income" "High income" "High income" "High income" ...
## $ GDP : num [1:4509] 0.14 0.153 0.269 0.148 0.167 ...
## - attr(*, "spec")=
## .. cols(
## .. iso2c = col_character(),
## .. iso3c = col_character(),
## .. country = col_character(),
## .. year = col_double(),
## .. gdp_percap = col_double(),
## .. population = col_double(),
## .. birth_rate = col_double(),
## .. neonat_mortal_rate = col_double(),
## .. region = col_character(),
## .. income = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
#dim(nations_nona)
# Change the theme
ggplot(nations, aes(x = year, y = gdp_percap)) +
#ggplot(nations_nona, aes(x = year, y = gdp_percap)) +
xlab("year") +
ylab("GDP ($ trillion)") +
theme_minimal(base_size = 12)
# Include all the related counties
p1 <- ggplot(nations, aes(x = year, y = gdp_percap)) +
#p1 <- ggplot(nations_nona, aes(x = year, y = gdp_percap)) +
labs(title = "GDP VERSUS YEAR FOR EACH COUNTRY",
caption = "Source: The World Bank and Siadi") +
xlab("year") +
ylab("GDP ($ trillion)") +
#filter(starwars, species == "Human) +
#filter(nations_nona, country == "China")+
theme_minimal(base_size = 12)
p1 + geom_point()
## Warning: Removed 766 rows containing missing values (geom_point).
p3 <- p2 + xlim(1990,2015)+ ylim(0,10^5)
p3 + geom_point()
## Warning: Removed 781 rows containing missing values (geom_point).
p4 <- p3 + geom_point() + geom_smooth(color = "red")
p4
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 781 rows containing non-finite values (stat_smooth).
## Warning: Removed 781 rows containing missing values (geom_point).
p5 <- p3 + geom_point() + geom_smooth(method='lm',formula=y~x)
p5
## Warning: Removed 781 rows containing non-finite values (stat_smooth).
## Warning: Removed 781 rows containing missing values (geom_point).
p6 <- p3 + geom_point() + geom_smooth(method='lm',formula=y~x, se = FALSE, linetype= "dotdash", size = 0.3) +
ggtitle("GDP VERSUS YEAR IN THE SELECTED COUNTRIES")
p6
## Warning: Removed 781 rows containing non-finite values (stat_smooth).
## Warning: Removed 781 rows containing missing values (geom_point).
###For the second chart, using dplyr you will need to group_by region and year, and then summarize on your mutated value for gdp using summarise(GDP = sum(gdp, na.rm = TRUE)). (There will be null values, or NAs, in this data, so you will need to use na.rm = TRUE).
# Change the theme
ggplot(nations, aes(x = year, y = gdp_percap)) +
#ggplot(nations_nona, aes(x = year, y = gdp_percap)) +
xlab("year") +
ylab("GDP ($ trillion)") +
theme_minimal(base_size = 12)
# Include all the related counties and generated by the command geom_area ()
p21 <- ggplot(nations, aes(x = year, y = gdp_percap)) +
#p21 <- ggplot(nations_nona, aes(x = year, y = gdp_percap)) +
labs(title = "GDP VERSUS YEAR FOR EACH COUNTRY",
caption = "Source: The World Bank and Siadi") +
xlab("year") +
ylab("GDP ($ trillion)") +
#filter(starwars, species == "Human) +
#filter(nations_nona, country == "China")+
theme_minimal(base_size = 12)
p21 + geom_area ()
## Warning: Removed 766 rows containing missing values (position_stack).