The link for the data source https://openei.org/doe-opendata/dataset/21d53271-a3fb-49bf-a3cd-0e66af954f78/resource/4cae93e5-d067-47bc-9150-1de90f04c275/download/totalcoalconsumption19802009quadrillionbtu.csv
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
data_path <- "https://openei.org/doe-opendata/dataset/21d53271-a3fb-49bf-a3cd-0e66af954f78/resource/4cae93e5-d067-47bc-9150-1de90f04c275/download/totalcoalconsumption19802009quadrillionbtu.csv"
df <- read.csv(data_path)
glimpse(df)
## Observations: 232
## Variables: 31
## $ X <fctr> North America, Bermuda, Canada, Greenland, Mexico, Sain...
## $ X1980 <fctr> 16.45179, 0, 0.96156, 0.00005, 0.10239, 0, 15.38779, 0....
## $ X1981 <fctr> 16.98772, 0, 0.99047, 0.00005, 0.10562, 0, 15.89158, 0....
## $ X1982 <fctr> 16.47546, 0, 1.05584, 0.00003, 0.11967, 0, 15.29993, 0....
## $ X1983 <fctr> 17.12407, 0, 1.11653, 0.00003, 0.12869, 0, 15.87882, 0....
## $ X1984 <fctr> 18.4267, 0, 1.23682, 0.00003, 0.13071, 0, 17.05914, 0.5...
## $ X1985 <fctr> 18.81819, 0, 1.20679, 0, 0.14646, 0, 17.46494, 0.59161,...
## $ X1986 <fctr> 18.52559, 0, 1.12583, 0, 0.15609, 0, 17.24367, 0.59652,...
## $ X1987 <fctr> 19.43781, 0, 1.25072, 0, 0.17001, 0, 18.01708, 0.61574,...
## $ X1988 <fctr> 20.40363, 0, 1.35809, 0, 0.15967, 0, 18.88587, 0.66937,...
## $ X1989 <fctr> 20.62571, 0, 1.35196, 0, 0.17359, 0, 19.10017, 0.6737, ...
## $ X1990 <fctr> 20.5602, 0, 1.21338, 0, 0.1694, 0, 19.17742, 0.57386, 0...
## $ X1991 <fctr> 20.4251, 0, 1.26457, 0, 0.15916, 0, 19.00137, 0.67426, ...
## $ X1992 <fctr> 20.64672, 0, 1.32379, 0, 0.16584, 0, 19.15709, 0.64595,...
## $ X1993 <fctr> 21.28219, 0, 1.22875, 0, 0.19118, 0, 19.86225, 0.66307,...
## $ X1994 <fctr> 21.39631, 0, 1.24492, 0, 0.1836, 0, 19.96779, 0.66227, ...
## $ X1995 <fctr> 21.64225, 0, 1.28479, 0, 0.20768, 0, 20.14978, 0.6533, ...
## $ X1996 <fctr> 22.57572, 0, 1.30032, 0, 0.25067, 0, 21.02473, 0.7177, ...
## $ X1997 <fctr> 23.20491, 0, 1.44933, 0, 0.26373, 0, 21.49186, 0.80408,...
## $ X1998 <fctr> 23.5002, 0, 1.50985, 0, 0.26753, 0, 21.72283, 0.82202, ...
## $ X1999 <fctr> 23.4747, 0, 1.505, 0, 0.28947, 0, 21.68023, 0.77054, 0,...
## $ X2000 <fctr> 24.55583, 0, 1.61651, 0, 0.29444, 0, 22.64488, 0.75904,...
## $ X2001 <fctr> 23.62705, 0, 1.35444, 0, 0.32908, 0, 21.94353, 0.69699,...
## $ X2002 <fctr> 23.69876, 0, 1.36876, 0, 0.36525, 0, 21.96475, 0.71689,...
## $ X2003 <fctr> 24.17788, 0, 1.38766, 0, 0.41878, 0, 22.37144, 0.77548,...
## $ X2004 <fctr> 24.36024, 0, 1.43684, 0, 0.31944, 0, 22.60396, 0.8271, ...
## $ X2005 <fctr> 24.6876, 0, 1.44948, 0, 0.39739, 0, 22.84073, 0.83557, ...
## $ X2006 <fctr> 24.32174, 0, 1.42135, 0, 0.39244, 0, 22.50795, 0.87547,...
## $ X2007 <fctr> 24.54746, 0, 1.38369, 0, 0.38911, 0, 22.77466, 0.91489,...
## $ X2008 <fctr> 24.11993, 0, 1.37388, 0, 0.32008, 0, 22.42597, 1.0013, ...
## $ X2009 <fctr> 21.14803, 0, 1.14314, 0, 0.3365, 0, 19.66839, 0.91702, ...
Region
colnames(df)[1] <- "region"
summary(df)
## region X1980 X1981 X1982 X1983
## Afghanistan : 1 0 :93 0 :91 0 :92 0 :92
## Africa : 1 -- :30 -- :30 -- :30 -- :30
## Albania : 1 0.00003:10 0.00003 :12 0.00003:11 0.00003: 9
## Algeria : 1 0.00005: 6 0.00005 : 5 0.00008: 3 0.00005: 2
## American Samoa: 1 0.00011: 2 0.00127 : 2 0.00005: 2 0.00008: 2
## Angola : 1 0.00012: 2 11.49459: 2 0.0067 : 2 0.00019: 2
## (Other) :226 (Other):89 (Other) :90 (Other):92 (Other):95
## X1984 X1985 X1986 X1987 X1988
## 0 :93 0 :95 0 :95 0 :94 0 :94
## -- :30 -- :30 -- :29 -- :30 -- :30
## 0.00003: 7 0.00003: 5 0.00003 : 7 0.00003 : 6 0.00003 : 6
## 0.00005: 2 0.00005: 3 0.00005 : 3 0.00002 : 2 0.00002 : 2
## 0.00019: 2 0.00002: 2 0.00029 : 2 0.00052 : 2 0.00335 : 2
## 0.00024: 2 0.00011: 2 12.42751: 2 12.46216: 2 12.65876: 2
## (Other):96 (Other):95 (Other) :94 (Other) :96 (Other) :96
## X1989 X1990 X1991 X1992 X1993
## 0 :94 0 : 93 0 : 90 0 : 94 0 : 93
## -- :30 -- : 29 -- : 30 -- : 12 -- : 11
## 0.00003 : 4 0.00003: 3 0.00002: 5 0.00002: 2 0.00003: 3
## 0.00029 : 3 0.00006: 3 0.00001: 2 0.00003: 2 0.00006: 3
## 0.00002 : 2 0.00002: 2 0.00003: 2 0.00007: 2 0.00002: 2
## 11.99361: 2 0.00034: 2 0.00041: 2 0.00164: 2 0.00008: 1
## (Other) :97 (Other):100 (Other):101 (Other):118 (Other):119
## X1994 X1995 X1996 X1997 X1998
## 0 : 94 0 : 93 0 : 95 0 : 95 0 : 97
## -- : 10 -- : 10 -- : 10 -- : 9 -- : 9
## 0.00003: 3 0.00003: 4 0.00003: 4 0.00003: 6 0.00003 : 9
## 0.00002: 2 0.00156: 3 0.00008: 3 0.00008: 2 -0.00005: 1
## 0.00006: 2 0.00002: 2 0.00002: 2 0.00001: 1 0.00001 : 1
## 0.00016: 2 0.00005: 2 0.00006: 1 0.00002: 1 0.00008 : 1
## (Other):119 (Other):118 (Other):117 (Other):118 (Other) :114
## X1999 X2000 X2001 X2002 X2003
## 0 :100 0 : 99 0 :100 0 :101 0 :103
## -- : 9 -- : 9 -- : 9 -- : 9 -- : 8
## 0.00003: 4 0.00002: 2 0.00003 : 3 0.00003: 2 0.00036: 2
## 0.00184: 2 0.00003: 2 0.00167 : 2 0.00054: 2 0.00003: 1
## 0.00192: 2 0.00169: 2 -0.00025: 1 0.00006: 1 0.00004: 1
## 0.00001: 1 0.00001: 1 0.00001 : 1 0.00008: 1 0.00005: 1
## (Other):114 (Other):117 (Other) :116 (Other):116 (Other):116
## X2004 X2005 X2006 X2007 X2008
## 0 :101 0 :101 0 :101 0 :101 0 :102
## -- : 8 -- : 8 -- : 7 -- : 7 -- : 7
## 0.00026 : 2 0.00003: 2 0.00003: 2 0.00551: 2 0.00551: 2
## -0.00003: 1 0.00192: 2 0.00001: 1 0.00001: 1 0.00001: 1
## 0.00002 : 1 0.00001: 1 0.00008: 1 0.00004: 1 0.00002: 1
## 0.00003 : 1 0.00005: 1 0.00011: 1 0.0001 : 1 0.00006: 1
## (Other) :118 (Other):117 (Other):119 (Other):119 (Other):118
## X2009
## 0 :100
## -- : 7
## 0.00001: 2
## 0.00243: 2
## 0.00551: 2
## 0.00002: 1
## (Other):118
df_new <- gather(df,'year','coal_consumption', -region)
## Warning: attributes are not identical across measure variables;
## they will be dropped
glimpse(df_new)
## Observations: 6,960
## Variables: 3
## $ region <fctr> North America, Bermuda, Canada, Greenland, M...
## $ year <chr> "X1980", "X1980", "X1980", "X1980", "X1980", ...
## $ coal_consumption <chr> "16.45179", "0", "0.96156", "0.00005", "0.102...
str(df_new)
## 'data.frame': 6960 obs. of 3 variables:
## $ region : Factor w/ 232 levels "Afghanistan",..: 158 24 37 89 139 179 219 40 7 8 ...
## $ year : chr "X1980" "X1980" "X1980" "X1980" ...
## $ coal_consumption: chr "16.45179" "0" "0.96156" "0.00005" ...
Remove the leading x from the year and convert the coal_consumption data type to numeric from strings.
df_new$coal_consumption <- as.numeric(df_new$coal_consumption)
## Warning: NAs introduced by coercion
glimpse(df_new)
## Observations: 6,960
## Variables: 3
## $ region <fctr> North America, Bermuda, Canada, Greenland, M...
## $ year <chr> "X1980", "X1980", "X1980", "X1980", "X1980", ...
## $ coal_consumption <dbl> 16.45179, 0.00000, 0.96156, 0.00005, 0.10239,...
df_new$year <- gsub('X','',df_new$year)
df_new$year <- as.numeric(df_new$year)
glimpse(df_new)
## Observations: 6,960
## Variables: 3
## $ region <fctr> North America, Bermuda, Canada, Greenland, M...
## $ year <dbl> 1980, 1980, 1980, 1980, 1980, 1980, 1980, 198...
## $ coal_consumption <dbl> 16.45179, 0.00000, 0.96156, 0.00005, 0.10239,...
we can see that the region contains both country and continent, we need to seperate the continent from the country.
unique(df_new$region)
## [1] North America Bermuda
## [3] Canada Greenland
## [5] Mexico Saint Pierre and Miquelon
## [7] United States Central & South America
## [9] Antarctica Antigua and Barbuda
## [11] Argentina Aruba
## [13] Bahamas, The Barbados
## [15] Belize Bolivia
## [17] Brazil Cayman Islands
## [19] Chile Colombia
## [21] Costa Rica Cuba
## [23] Dominica Dominican Republic
## [25] Ecuador El Salvador
## [27] Falkland Islands (Islas Malvinas) French Guiana
## [29] Grenada Guadeloupe
## [31] Guatemala Guyana
## [33] Haiti Honduras
## [35] Jamaica Martinique
## [37] Montserrat Netherlands Antilles
## [39] Nicaragua Panama
## [41] Paraguay Peru
## [43] Puerto Rico Saint Kitts and Nevis
## [45] Saint Lucia Saint Vincent/Grenadines
## [47] Suriname Trinidad and Tobago
## [49] Turks and Caicos Islands Uruguay
## [51] Venezuela Virgin Islands, U.S.
## [53] Virgin Islands, British Europe
## [55] Albania Austria
## [57] Belgium Bosnia and Herzegovina
## [59] Bulgaria Croatia
## [61] Cyprus Czech Republic
## [63] Denmark Faroe Islands
## [65] Finland Former Czechoslovakia
## [67] Former Serbia and Montenegro Former Yugoslavia
## [69] France Germany
## [71] Germany, East Germany, West
## [73] Gibraltar Greece
## [75] Hungary Iceland
## [77] Ireland Italy
## [79] Luxembourg Macedonia
## [81] Malta Montenegro
## [83] Netherlands Norway
## [85] Poland Portugal
## [87] Romania Serbia
## [89] Slovakia Slovenia
## [91] Spain Sweden
## [93] Switzerland Turkey
## [95] United Kingdom Eurasia
## [97] Armenia Azerbaijan
## [99] Belarus Estonia
## [101] Former U.S.S.R. Georgia
## [103] Kazakhstan Kyrgyzstan
## [105] Latvia Lithuania
## [107] Moldova Russia
## [109] Tajikistan Turkmenistan
## [111] Ukraine Uzbekistan
## [113] Middle East Bahrain
## [115] Iran Iraq
## [117] Israel Jordan
## [119] Kuwait Lebanon
## [121] Oman Palestine
## [123] Qatar Saudi Arabia
## [125] Syria United Arab Emirates
## [127] Yemen Africa
## [129] Algeria Angola
## [131] Benin Botswana
## [133] Burkina Faso Burundi
## [135] Cameroon Cape Verde
## [137] Central African Republic Chad
## [139] Comoros Congo (Brazzaville)
## [141] Congo (Kinshasa) Cote dIvoire (IvoryCoast)
## [143] Djibouti Egypt
## [145] Equatorial Guinea Eritrea
## [147] Ethiopia Gabon
## [149] Gambia, The Ghana
## [151] Guinea Guinea-Bissau
## [153] Kenya Lesotho
## [155] Liberia Libya
## [157] Madagascar Malawi
## [159] Mali Mauritania
## [161] Mauritius Morocco
## [163] Mozambique Namibia
## [165] Niger Nigeria
## [167] Reunion Rwanda
## [169] Saint Helena Sao Tome and Principe
## [171] Senegal Seychelles
## [173] Sierra Leone Somalia
## [175] South Africa Sudan
## [177] Swaziland Tanzania
## [179] Togo Tunisia
## [181] Uganda Western Sahara
## [183] Zambia Zimbabwe
## [185] Asia & Oceania Afghanistan
## [187] American Samoa Australia
## [189] Bangladesh Bhutan
## [191] Brunei Burma (Myanmar)
## [193] Cambodia China
## [195] Cook Islands Fiji
## [197] French Polynesia Guam
## [199] Hawaiian Trade Zone Hong Kong
## [201] India Indonesia
## [203] Japan Kiribati
## [205] Korea, North Korea, South
## [207] Laos Macau
## [209] Malaysia Maldives
## [211] Mongolia Nauru
## [213] Nepal New Caledonia
## [215] New Zealand Niue
## [217] Pakistan Papua New Guinea
## [219] Philippines Samoa
## [221] Singapore Solomon Islands
## [223] Sri Lanka Taiwan
## [225] Thailand Timor-Leste (East Timor)
## [227] Tonga U.S. Pacific Islands
## [229] Vanuatu Vietnam
## [231] Wake Island World
## 232 Levels: Afghanistan Africa Albania Algeria American Samoa ... Zimbabwe
North America", "Central & South America", "Antarctica", "Europe", "Eurasia", "Middle East", "Africa", "Asia & Oceania", "World"
. this are non-countries.
noncountry <- c("North America", "Central & South America", "Antarctica", "Europe", "Eurasia", "Middle East", "Africa", "Asia & Oceania")
matches <- which(!is.na(match(df_new$region,noncountry)))
df_region <- df_new[matches,]
unique(df_region$region)
## [1] North America Central & South America Antarctica
## [4] Europe Eurasia Middle East
## [7] Africa Asia & Oceania
## 232 Levels: Afghanistan Africa Albania Algeria American Samoa ... Zimbabwe
summary(df_region)
## region year coal_consumption
## Africa :30 Min. :1980 Min. : 0.0000
## Antarctica :30 1st Qu.:1987 1st Qu.: 0.4149
## Asia & Oceania :30 Median :1994 Median : 5.3251
## Central & South America:30 Mean :1994 Mean :11.8618
## Eurasia :30 3rd Qu.:2002 3rd Qu.:19.5980
## Europe :30 Max. :2009 Max. :91.2524
## (Other) :60
While coal consuption for almost all region are decreasing, Assian and Oceanic coal consumption is increasing significantly
ggplot(data = df_region, mapping = aes(x=year, y=coal_consumption)) + geom_line(mapping =aes(color=region))