To tidy the dataset load tidyverse
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Load the dataset
coal_data <- read_csv('http://594442.youcanlearnit.net/coal.csv')
## Warning: Missing column names filled in: 'X2' [2], 'X3' [3], 'X4' [4], 'X5' [5],
## 'X6' [6], 'X7' [7], 'X8' [8], 'X9' [9], 'X10' [10], 'X11' [11], 'X12' [12],
## 'X13' [13], 'X14' [14], 'X15' [15], 'X16' [16], 'X17' [17], 'X18' [18],
## 'X19' [19], 'X20' [20], 'X21' [21], 'X22' [22], 'X23' [23], 'X24' [24],
## 'X25' [25], 'X26' [26], 'X27' [27], 'X28' [28], 'X29' [29], 'X30' [30],
## 'X31' [31]
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_character()
## )
## i Use `spec()` for the full column specifications.
The glimpse shows row number two is empty. Hence, I will reload the dataset to ignore the empty row. I will now overwrite the dataset
coal_data <- read_csv('http://594442.youcanlearnit.net/coal.csv', skip=2)
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_character()
## )
## i Use `spec()` for the full column specifications.
Now that the row number two is removed, I will take a glimpse of the data
glimpse(coal_data)
## Rows: 232
## Columns: 31
## $ X1 <chr> "North America", "Bermuda", "Canada", "Greenland", "Mexico",...
## $ `1980` <chr> "16.45179", "0", "0.96156", "0.00005", "0.10239", "0", "15.3...
## $ `1981` <chr> "16.98772", "0", "0.99047", "0.00005", "0.10562", "0", "15.8...
## $ `1982` <chr> "16.47546", "0", "1.05584", "0.00003", "0.11967", "0", "15.2...
## $ `1983` <chr> "17.12407", "0", "1.11653", "0.00003", "0.12869", "0", "15.8...
## $ `1984` <chr> "18.4267", "0", "1.23682", "0.00003", "0.13071", "0", "17.05...
## $ `1985` <chr> "18.81819", "0", "1.20679", "0", "0.14646", "0", "17.46494",...
## $ `1986` <chr> "18.52559", "0", "1.12583", "0", "0.15609", "0", "17.24367",...
## $ `1987` <chr> "19.43781", "0", "1.25072", "0", "0.17001", "0", "18.01708",...
## $ `1988` <chr> "20.40363", "0", "1.35809", "0", "0.15967", "0", "18.88587",...
## $ `1989` <chr> "20.62571", "0", "1.35196", "0", "0.17359", "0", "19.10017",...
## $ `1990` <chr> "20.5602", "0", "1.21338", "0", "0.1694", "0", "19.17742", "...
## $ `1991` <chr> "20.4251", "0", "1.26457", "0", "0.15916", "0", "19.00137", ...
## $ `1992` <chr> "20.64672", "0", "1.32379", "0", "0.16584", "0", "19.15709",...
## $ `1993` <chr> "21.28219", "0", "1.22875", "0", "0.19118", "0", "19.86225",...
## $ `1994` <chr> "21.39631", "0", "1.24492", "0", "0.1836", "0", "19.96779", ...
## $ `1995` <chr> "21.64225", "0", "1.28479", "0", "0.20768", "0", "20.14978",...
## $ `1996` <chr> "22.57572", "0", "1.30032", "0", "0.25067", "0", "21.02473",...
## $ `1997` <chr> "23.20491", "0", "1.44933", "0", "0.26373", "0", "21.49186",...
## $ `1998` <chr> "23.5002", "0", "1.50985", "0", "0.26753", "0", "21.72283", ...
## $ `1999` <chr> "23.4747", "0", "1.505", "0", "0.28947", "0", "21.68023", "0...
## $ `2000` <chr> "24.55583", "0", "1.61651", "0", "0.29444", "0", "22.64488",...
## $ `2001` <chr> "23.62705", "0", "1.35444", "0", "0.32908", "0", "21.94353",...
## $ `2002` <chr> "23.69876", "0", "1.36876", "0", "0.36525", "0", "21.96475",...
## $ `2003` <chr> "24.17788", "0", "1.38766", "0", "0.41878", "0", "22.37144",...
## $ `2004` <chr> "24.36024", "0", "1.43684", "0", "0.31944", "0", "22.60396",...
## $ `2005` <chr> "24.6876", "0", "1.44948", "0", "0.39739", "0", "22.84073", ...
## $ `2006` <chr> "24.32174", "0", "1.42135", "0", "0.39244", "0", "22.50795",...
## $ `2007` <chr> "24.54746", "0", "1.38369", "0", "0.38911", "0", "22.77466",...
## $ `2008` <chr> "24.11993", "0", "1.37388", "0", "0.32008", "0", "22.42597",...
## $ `2009` <chr> "21.14803", "0", "1.14314", "0", "0.3365", "0", "19.66839", ...
I will rename the column name to Region from X1 so that when we convert the dataset to long, all regions fall under column 1
colnames (coal_data)[1] <- ('Region')
summarize the data
summary (coal_data)
## Region 1980 1981 1982
## Length:232 Length:232 Length:232 Length:232
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## 1983 1984 1985 1986
## Length:232 Length:232 Length:232 Length:232
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## 1987 1988 1989 1990
## Length:232 Length:232 Length:232 Length:232
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## 1991 1992 1993 1994
## Length:232 Length:232 Length:232 Length:232
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## 1995 1996 1997 1998
## Length:232 Length:232 Length:232 Length:232
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## 1999 2000 2001 2002
## Length:232 Length:232 Length:232 Length:232
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## 2003 2004 2005 2006
## Length:232 Length:232 Length:232 Length:232
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## 2007 2008 2009
## Length:232 Length:232 Length:232
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
Data can be seen stored in characters. I will now see the data set
coal_data
## # A tibble: 232 x 31
## Region `1980` `1981` `1982` `1983` `1984` `1985` `1986` `1987` `1988` `1989`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 North~ 16.45~ 16.98~ 16.47~ 17.12~ 18.42~ 18.81~ 18.52~ 19.43~ 20.40~ 20.62~
## 2 Bermu~ 0 0 0 0 0 0 0 0 0 0
## 3 Canada 0.961~ 0.990~ 1.055~ 1.116~ 1.236~ 1.206~ 1.125~ 1.250~ 1.358~ 1.351~
## 4 Green~ 0.000~ 0.000~ 0.000~ 0.000~ 0.000~ 0 0 0 0 0
## 5 Mexico 0.102~ 0.105~ 0.119~ 0.128~ 0.130~ 0.146~ 0.156~ 0.170~ 0.159~ 0.173~
## 6 Saint~ 0 0 0 0 0 0 0 0 0 0
## 7 Unite~ 15.38~ 15.89~ 15.29~ 15.87~ 17.05~ 17.46~ 17.24~ 18.01~ 18.88~ 19.10~
## 8 Centr~ 0.420~ 0.413~ 0.4154 0.466~ 0.524~ 0.591~ 0.596~ 0.615~ 0.669~ 0.6737
## 9 Antar~ 0 0 0 0 0 0 0 0 0 0
## 10 Antig~ 0 0 0 0 0 0 0 0 0 0
## # ... with 222 more rows, and 20 more variables: `1990` <chr>, `1991` <chr>,
## # `1992` <chr>, `1993` <chr>, `1994` <chr>, `1995` <chr>, `1996` <chr>,
## # `1997` <chr>, `1998` <chr>, `1999` <chr>, `2000` <chr>, `2001` <chr>,
## # `2002` <chr>, `2003` <chr>, `2004` <chr>, `2005` <chr>, `2006` <chr>,
## # `2007` <chr>, `2008` <chr>, `2009` <chr>
The dataset is wide hence to make it easier to interpret I can convert it into a long dataset. Using the gather function, I will make a new tibble for the new dataset
coal_datalong <- gather(coal_data,'year','coal_consumption', -Region)
Now that dataset is made longer it shall be easier to read. To see the dataset wi will take a glimpse of the data
glimpse(coal_datalong)
## Rows: 6,960
## Columns: 3
## $ Region <chr> "North America", "Bermuda", "Canada", "Greenland",...
## $ year <chr> "1980", "1980", "1980", "1980", "1980", "1980", "1...
## $ coal_consumption <chr> "16.45179", "0", "0.96156", "0.00005", "0.10239", ...
Change the character string to an integer for the year column
coal_datalong$year <- as.integer(coal_datalong$year)
coal_datalong$coal_consumption <- as.numeric(coal_datalong$coal_consumption)
## Warning: NAs introduced by coercion
summary(coal_datalong)
## Region year coal_consumption
## Length:6960 Min. :1980 Min. : -0.0003
## Class :character 1st Qu.:1987 1st Qu.: 0.0000
## Mode :character Median :1994 Median : 0.0002
## Mean :1994 Mean : 1.3256
## 3rd Qu.:2002 3rd Qu.: 0.0773
## Max. :2009 Max. :138.8298
## NA's :517
To avoid duplicates, I will run unique function
unique(coal_datalong$Region)
## [1] "North America" "Bermuda"
## [3] "Canada" "Greenland"
## [5] "Mexico" "Saint Pierre and Miquelon"
## [7] "United States" "Central & South America"
## [9] "Antarctica" "Antigua and Barbuda"
## [11] "Argentina" "Aruba"
## [13] "Bahamas, The" "Barbados"
## [15] "Belize" "Bolivia"
## [17] "Brazil" "Cayman Islands"
## [19] "Chile" "Colombia"
## [21] "Costa Rica" "Cuba"
## [23] "Dominica" "Dominican Republic"
## [25] "Ecuador" "El Salvador"
## [27] "Falkland Islands (Islas Malvinas)" "French Guiana"
## [29] "Grenada" "Guadeloupe"
## [31] "Guatemala" "Guyana"
## [33] "Haiti" "Honduras"
## [35] "Jamaica" "Martinique"
## [37] "Montserrat" "Netherlands Antilles"
## [39] "Nicaragua" "Panama"
## [41] "Paraguay" "Peru"
## [43] "Puerto Rico" "Saint Kitts and Nevis"
## [45] "Saint Lucia" "Saint Vincent/Grenadines"
## [47] "Suriname" "Trinidad and Tobago"
## [49] "Turks and Caicos Islands" "Uruguay"
## [51] "Venezuela" "Virgin Islands, U.S."
## [53] "Virgin Islands, British" "Europe"
## [55] "Albania" "Austria"
## [57] "Belgium" "Bosnia and Herzegovina"
## [59] "Bulgaria" "Croatia"
## [61] "Cyprus" "Czech Republic"
## [63] "Denmark" "Faroe Islands"
## [65] "Finland" "Former Czechoslovakia"
## [67] "Former Serbia and Montenegro" "Former Yugoslavia"
## [69] "France" "Germany"
## [71] "Germany, East" "Germany, West"
## [73] "Gibraltar" "Greece"
## [75] "Hungary" "Iceland"
## [77] "Ireland" "Italy"
## [79] "Luxembourg" "Macedonia"
## [81] "Malta" "Montenegro"
## [83] "Netherlands" "Norway"
## [85] "Poland" "Portugal"
## [87] "Romania" "Serbia"
## [89] "Slovakia" "Slovenia"
## [91] "Spain" "Sweden"
## [93] "Switzerland" "Turkey"
## [95] "United Kingdom" "Eurasia"
## [97] "Armenia" "Azerbaijan"
## [99] "Belarus" "Estonia"
## [101] "Former U.S.S.R." "Georgia"
## [103] "Kazakhstan" "Kyrgyzstan"
## [105] "Latvia" "Lithuania"
## [107] "Moldova" "Russia"
## [109] "Tajikistan" "Turkmenistan"
## [111] "Ukraine" "Uzbekistan"
## [113] "Middle East" "Bahrain"
## [115] "Iran" "Iraq"
## [117] "Israel" "Jordan"
## [119] "Kuwait" "Lebanon"
## [121] "Oman" "Palestine"
## [123] "Qatar" "Saudi Arabia"
## [125] "Syria" "United Arab Emirates"
## [127] "Yemen" "Africa"
## [129] "Algeria" "Angola"
## [131] "Benin" "Botswana"
## [133] "Burkina Faso" "Burundi"
## [135] "Cameroon" "Cape Verde"
## [137] "Central African Republic" "Chad"
## [139] "Comoros" "Congo (Brazzaville)"
## [141] "Congo (Kinshasa)" "Cote dIvoire (IvoryCoast)"
## [143] "Djibouti" "Egypt"
## [145] "Equatorial Guinea" "Eritrea"
## [147] "Ethiopia" "Gabon"
## [149] "Gambia, The" "Ghana"
## [151] "Guinea" "Guinea-Bissau"
## [153] "Kenya" "Lesotho"
## [155] "Liberia" "Libya"
## [157] "Madagascar" "Malawi"
## [159] "Mali" "Mauritania"
## [161] "Mauritius" "Morocco"
## [163] "Mozambique" "Namibia"
## [165] "Niger" "Nigeria"
## [167] "Reunion" "Rwanda"
## [169] "Saint Helena" "Sao Tome and Principe"
## [171] "Senegal" "Seychelles"
## [173] "Sierra Leone" "Somalia"
## [175] "South Africa" "Sudan"
## [177] "Swaziland" "Tanzania"
## [179] "Togo" "Tunisia"
## [181] "Uganda" "Western Sahara"
## [183] "Zambia" "Zimbabwe"
## [185] "Asia & Oceania" "Afghanistan"
## [187] "American Samoa" "Australia"
## [189] "Bangladesh" "Bhutan"
## [191] "Brunei" "Burma (Myanmar)"
## [193] "Cambodia" "China"
## [195] "Cook Islands" "Fiji"
## [197] "French Polynesia" "Guam"
## [199] "Hawaiian Trade Zone" "Hong Kong"
## [201] "India" "Indonesia"
## [203] "Japan" "Kiribati"
## [205] "Korea, North" "Korea, South"
## [207] "Laos" "Macau"
## [209] "Malaysia" "Maldives"
## [211] "Mongolia" "Nauru"
## [213] "Nepal" "New Caledonia"
## [215] "New Zealand" "Niue"
## [217] "Pakistan" "Papua New Guinea"
## [219] "Philippines" "Samoa"
## [221] "Singapore" "Solomon Islands"
## [223] "Sri Lanka" "Taiwan"
## [225] "Thailand" "Timor-Leste (East Timor)"
## [227] "Tonga" "U.S. Pacific Islands"
## [229] "Vanuatu" "Vietnam"
## [231] "Wake Island" "World"
I can observe there are regional totals and then the consumption data is recorded as a country. I will create a new tibble to include global and region wise coal consumption
Continent <- c("North America","Antarctica","Europe","Central & South America",
"Eurasia","Middle East","Africa","Asia & Oceania", "World")
Lets find the continents in the dataset
locationbaseddata <- which(!is.na(match(coal_datalong$Region, Continent)))
Seperate countries and continents
Coal_Country <- coal_datalong[-locationbaseddata,]
Coal_Continent <- coal_datalong[locationbaseddata,]
unique(Coal_Country$Region)
## [1] "Bermuda" "Canada"
## [3] "Greenland" "Mexico"
## [5] "Saint Pierre and Miquelon" "United States"
## [7] "Antigua and Barbuda" "Argentina"
## [9] "Aruba" "Bahamas, The"
## [11] "Barbados" "Belize"
## [13] "Bolivia" "Brazil"
## [15] "Cayman Islands" "Chile"
## [17] "Colombia" "Costa Rica"
## [19] "Cuba" "Dominica"
## [21] "Dominican Republic" "Ecuador"
## [23] "El Salvador" "Falkland Islands (Islas Malvinas)"
## [25] "French Guiana" "Grenada"
## [27] "Guadeloupe" "Guatemala"
## [29] "Guyana" "Haiti"
## [31] "Honduras" "Jamaica"
## [33] "Martinique" "Montserrat"
## [35] "Netherlands Antilles" "Nicaragua"
## [37] "Panama" "Paraguay"
## [39] "Peru" "Puerto Rico"
## [41] "Saint Kitts and Nevis" "Saint Lucia"
## [43] "Saint Vincent/Grenadines" "Suriname"
## [45] "Trinidad and Tobago" "Turks and Caicos Islands"
## [47] "Uruguay" "Venezuela"
## [49] "Virgin Islands, U.S." "Virgin Islands, British"
## [51] "Albania" "Austria"
## [53] "Belgium" "Bosnia and Herzegovina"
## [55] "Bulgaria" "Croatia"
## [57] "Cyprus" "Czech Republic"
## [59] "Denmark" "Faroe Islands"
## [61] "Finland" "Former Czechoslovakia"
## [63] "Former Serbia and Montenegro" "Former Yugoslavia"
## [65] "France" "Germany"
## [67] "Germany, East" "Germany, West"
## [69] "Gibraltar" "Greece"
## [71] "Hungary" "Iceland"
## [73] "Ireland" "Italy"
## [75] "Luxembourg" "Macedonia"
## [77] "Malta" "Montenegro"
## [79] "Netherlands" "Norway"
## [81] "Poland" "Portugal"
## [83] "Romania" "Serbia"
## [85] "Slovakia" "Slovenia"
## [87] "Spain" "Sweden"
## [89] "Switzerland" "Turkey"
## [91] "United Kingdom" "Armenia"
## [93] "Azerbaijan" "Belarus"
## [95] "Estonia" "Former U.S.S.R."
## [97] "Georgia" "Kazakhstan"
## [99] "Kyrgyzstan" "Latvia"
## [101] "Lithuania" "Moldova"
## [103] "Russia" "Tajikistan"
## [105] "Turkmenistan" "Ukraine"
## [107] "Uzbekistan" "Bahrain"
## [109] "Iran" "Iraq"
## [111] "Israel" "Jordan"
## [113] "Kuwait" "Lebanon"
## [115] "Oman" "Palestine"
## [117] "Qatar" "Saudi Arabia"
## [119] "Syria" "United Arab Emirates"
## [121] "Yemen" "Algeria"
## [123] "Angola" "Benin"
## [125] "Botswana" "Burkina Faso"
## [127] "Burundi" "Cameroon"
## [129] "Cape Verde" "Central African Republic"
## [131] "Chad" "Comoros"
## [133] "Congo (Brazzaville)" "Congo (Kinshasa)"
## [135] "Cote dIvoire (IvoryCoast)" "Djibouti"
## [137] "Egypt" "Equatorial Guinea"
## [139] "Eritrea" "Ethiopia"
## [141] "Gabon" "Gambia, The"
## [143] "Ghana" "Guinea"
## [145] "Guinea-Bissau" "Kenya"
## [147] "Lesotho" "Liberia"
## [149] "Libya" "Madagascar"
## [151] "Malawi" "Mali"
## [153] "Mauritania" "Mauritius"
## [155] "Morocco" "Mozambique"
## [157] "Namibia" "Niger"
## [159] "Nigeria" "Reunion"
## [161] "Rwanda" "Saint Helena"
## [163] "Sao Tome and Principe" "Senegal"
## [165] "Seychelles" "Sierra Leone"
## [167] "Somalia" "South Africa"
## [169] "Sudan" "Swaziland"
## [171] "Tanzania" "Togo"
## [173] "Tunisia" "Uganda"
## [175] "Western Sahara" "Zambia"
## [177] "Zimbabwe" "Afghanistan"
## [179] "American Samoa" "Australia"
## [181] "Bangladesh" "Bhutan"
## [183] "Brunei" "Burma (Myanmar)"
## [185] "Cambodia" "China"
## [187] "Cook Islands" "Fiji"
## [189] "French Polynesia" "Guam"
## [191] "Hawaiian Trade Zone" "Hong Kong"
## [193] "India" "Indonesia"
## [195] "Japan" "Kiribati"
## [197] "Korea, North" "Korea, South"
## [199] "Laos" "Macau"
## [201] "Malaysia" "Maldives"
## [203] "Mongolia" "Nauru"
## [205] "Nepal" "New Caledonia"
## [207] "New Zealand" "Niue"
## [209] "Pakistan" "Papua New Guinea"
## [211] "Philippines" "Samoa"
## [213] "Singapore" "Solomon Islands"
## [215] "Sri Lanka" "Taiwan"
## [217] "Thailand" "Timor-Leste (East Timor)"
## [219] "Tonga" "U.S. Pacific Islands"
## [221] "Vanuatu" "Vietnam"
## [223] "Wake Island"
unique(Coal_Continent$Region)
## [1] "North America" "Central & South America"
## [3] "Antarctica" "Europe"
## [5] "Eurasia" "Middle East"
## [7] "Africa" "Asia & Oceania"
## [9] "World"
Visualize the coal consumption data of continent totals
library(ggplot2)
ggplot(data=Coal_Continent, mapping=aes(x=year, y=coal_consumption))+
geom_line(mapping=aes(color = Region))