Load in libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.0.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 4.0.3
library(rworldmap)
## Warning: package 'rworldmap' was built under R version 4.0.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 4.0.3
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
head(GHG)
## Country.Code Country.Name IncomeGroup GNI.Per.Capita..USD.
## 1 AFG Afghanistan Low income 550
## 2 ALB Albania Upper middle income 4860
## 3 DZA Algeria Upper middle income 4060
## 4 AGO Angola Lower middle income 3370
## 5 ARG Argentina Upper middle income 12370
## 6 ARM Armenia Upper middle income 4230
## GHG.Per.Capita change population
## 1 0.84 91.1 37172386
## 2 3.09 9.2 2866376
## 3 5.67 34.5 42228429
## 4 2.63 38.5 30809762
## 5 8.66 10.9 44494502
## 6 2.90 11.0 2951776
str(GHG)
## 'data.frame': 179 obs. of 7 variables:
## $ Country.Code : chr "AFG" "ALB" "DZA" "AGO" ...
## $ Country.Name : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ IncomeGroup : chr "Low income" "Upper middle income" "Upper middle income" "Lower middle income" ...
## $ GNI.Per.Capita..USD.: int 550 4860 4060 3370 12370 4230 23630 53190 49260 4050 ...
## $ GHG.Per.Capita : num 0.84 3.09 5.67 2.63 8.66 ...
## $ change : num 91.1 9.2 34.5 38.5 10.9 11 98.3 2.6 -12.3 21.3 ...
## $ population : int 37172386 2866376 42228429 30809762 44494502 2951776 105845 24992369 8847037 9942334 ...
summary(GHG)
## Country.Code Country.Name IncomeGroup GNI.Per.Capita..USD.
## Length:179 Length:179 Length:179 Min. : 280
## Class :character Class :character Class :character 1st Qu.: 1925
## Mode :character Mode :character Mode :character Median : 5470
## Mean :13985
## 3rd Qu.:17045
## Max. :83580
## GHG.Per.Capita change population
## Min. : 0.420 Min. :-34.30 Min. :1.032e+05
## 1st Qu.: 1.895 1st Qu.: -1.00 1st Qu.:2.828e+06
## Median : 4.000 Median : 20.40 Median :9.588e+06
## Mean : 6.879 Mean : 22.96 Mean :4.195e+07
## 3rd Qu.: 8.735 3rd Qu.: 42.50 3rd Qu.:3.029e+07
## Max. :59.000 Max. :136.20 Max. :1.393e+09
any(is.na(GHG))
## [1] FALSE
sum(is.na(GHG))
## [1] 0
colSums(is.na(GHG))
## Country.Code Country.Name IncomeGroup
## 0 0 0
## GNI.Per.Capita..USD. GHG.Per.Capita change
## 0 0 0
## population
## 0
ggplot(data=GHG, mapping = aes(x = IncomeGroup, y = GHG.Per.Capita)) +
geom_jitter() +
geom_boxplot(alpha = 0.7) +
theme_economist_white()+
labs ( y= "GreenHouse Gas Per Capita", x= "Income Group") +
ggtitle("Greenhouse Gas Per Capita Broken Down By Income Group")

We can see the higher the income group the more likely it is they have higher Greenhouse gas emissions per capita. Although, as we can see many of the outliers for the middle-income countries are above the mean of the high-income countries.
Let’s take a look at a world map of the distributions of Greenhouse Gases around the world to see how the countries compare.
But before starting we need to change GHG.Per.Capita Variable to a more suitable name becasue this variable will populate as the title of our world Map.
colnames(GHG)[colnames(GHG)=="GHG.Per.Capita"] = "Greenhouse_Gas_Per_Capita"
World_Map = joinCountryData2Map(GHG, nameJoinColumn = "Country.Name", joinCode = "NAME")
## 174 codes from your data successfully matched countries in the map
## 5 codes from your data failed to match with a country code in the map
## 69 codes from the map weren't represented in your data
mapCountryData(World_Map,
nameColumnToPlot = "Greenhouse_Gas_Per_Capita",
catMethod= "fixedwidth",
colourPalette = brewer.pal(10, "Spectral"),
numCats = 10)
## Warning in rwmGetClassBreaks(dataCategorised, catMethod = catMethod, numCats = numCats, : classification method should be set to one of :fixedWidth diverging quantiles pretty logFixedWidth categorical
## setting to fixedWidth as default

It is tough to tell the difference between a lot of the countries because we have some outliers in the High-income category. So I decide to make one without the worst GHG countries.
GHG_Under_30 = filter(GHG, GHG$Greenhouse_Gas_Per_Capita < 30)
World_Map_2 = joinCountryData2Map(GHG_Under_30, nameJoinColumn = "Country.Name", joinCode = "NAME")
## 170 codes from your data successfully matched countries in the map
## 5 codes from your data failed to match with a country code in the map
## 73 codes from the map weren't represented in your data
mapCountryData(World_Map_2,
nameColumnToPlot = "Greenhouse_Gas_Per_Capita",
catMethod= "fixedwidth",
colourPalette = brewer.pal(10, "Spectral"),
numCats = 10)
## Warning in rwmGetClassBreaks(dataCategorised, catMethod = catMethod, numCats = numCats, : classification method should be set to one of :fixedWidth diverging quantiles pretty logFixedWidth categorical
## setting to fixedWidth as default

As we can see, this map is a lot more clear than the other maps. The only countries we excluded were Qater, Curacao, Trinidad and Tobago, and Kuwait. All countries that were way too small anyway to see on the map. Now the country with the highest GHG per Capita is Austrialia.
plot_ly(data = GHG,
x=~population,
y=~Greenhouse_Gas_Per_Capita,
mode="markers",
color = GHG$IncomeGroup,
hoverinfo = "text",
text = paste("Population", GHG$Country.Name,
"<br>",
"Greenhouse Gas Per Capita", GHG$Greenhouse_Gas_Per_Capita)) %>%
layout(title = "Comparing Greenhouse Gas Emissions Per Capita To Popualtion",
yaxis=list(title="Greenhouse Gas Emissions Per Capita"),
xaxis=list(title="Population") )
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
Creating a df without the outliers. So beforehand it was tough to tell the difference between a lot of the points so I made another chart without a couple of countries that made the chart taught to read.
GHG_Under_30_Under_Pop = filter(GHG_Under_30, GHG_Under_30$population < 98423595)
plot_ly(data = GHG_Under_30_Under_Pop,
x=~population,
y=~Greenhouse_Gas_Per_Capita,
mode="markers",
color = GHG_Under_30_Under_Pop$IncomeGroup,
hoverinfo = "text",
text = paste("Population", GHG_Under_30_Under_Pop$Country.Name,
"<br>",
"Greenhouse Gas Per Capita", GHG_Under_30_Under_Pop$Greenhouse_Gas_Per_Capita)) %>%
layout(title = "Comparing Greenhouse Gas Emissions Per Capita To Popualtion",
yaxis=list(title="Greenhouse Gas Emissions Per Capita"),
xaxis=list(title="Population") )
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
Although this map is missing some countries, we refined our search to max it a lot easier to read.