library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(ggrepel)
library(gapminder)
setwd("C:/Users/kaitl/OneDrive/Documents/590_Working")
#update data types of dataframe
energy <- read_delim("./590_FinalData1.csv", delim = ",", col_types = "icciiciiiiiiii")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
energy1 <- energy
energy1[energy1 == '..'] <- NA
These first few trials are to gain insight on the data.
United States, China, and India’s renewable energy consumption from 1990-2016:
TrialEnergy <- energy1 |>
filter(energy1$country_name == "United States" |
energy1$country_name == "China"|
energy1$country_name == "India")
model <- lm(year ~ ren_energy_cons , data = TrialEnergy, na.action = na.omit)
rsquared <- summary(model)$r.squared
TrialEnergy |>
ggplot(mapping = aes(x = year,
y = ren_energy_cons)) +
geom_point(mapping = aes(color = country_name)) +
guides(color = guide_legend(title = "Country"))+
#linear model
geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed',
se = FALSE) +
#best fit line
geom_smooth(se = FALSE, color = 'black') +
labs(title = "Renewable Energy Consumption by Year",
subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)),
x = 'Years 1990-2016',
y = 'Renewable Energy Consumption') +
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
United States, China, and India total electricity consumption from 1990-2016.
TrialTotalEnergy <- energy1 |>
filter(energy1$country_name == "United States" |
energy1$country_name == "China"|
energy1$country_name == "India")
model <- lm(year ~ total_elec_output , data = TrialTotalEnergy, na.action = na.omit)
rsquared <- summary(model)$r.squared
TrialTotalEnergy |>
ggplot(mapping = aes(x = year,
y = total_elec_output)) +
geom_point(mapping = aes(color = country_name)) +
guides(color = guide_legend(title = "Country"))+
#linear model
geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed',
se = FALSE) +
#best fit line
geom_smooth(se = FALSE, color = 'black') +
labs(title = "Total Electric Energy Consumption by Year",
subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)),
x = 'Years 1990-2016',
y = 'Renewable Energy Consumption') +
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
Better representation of this time series, considering all other countries.
Unfinished, but left this in to share ideas, I thought this would be a better implementation of a time series. I was hoping to see a slight oscillation with weather changes in countries (depending on their global position). It would be interesting to see how different countries use energy as the seasons change.
# #update format of dates to be consistent
# energy1$year <- as.Date(energy1$year , format = "d%/m%/%y")
#
# # create a tsibble of renewable energy consumption for the United States
# energy_ <- energy1 |>
# filter(country_name == "United States") |>
# select(year, ren_energy_cons) |>
# distinct()
#
# energy_ts <- as_tsibble(energy_, index=year) |>
# index_by(date = date(year))
#
# energy_ts
Summarizing and testing for groups of variables for large summary statistics. I was interested in being able to compare large pieces of data that include every country, but with so many countries not being able to report their data and NA values being left, I found those large statistics more difficult to work with, resulting in my ultimate hypothesis.
dat <- energy1 |>
select(country_name, ren_energy_cons, full_pop_electricity_access)
summary(dat)
## country_name ren_energy_cons full_pop_electricity_access
## Length:6993 Min. : 0 Length:6993
## Class :character 1st Qu.: 0 Class :character
## Mode :character Median : 0 Mode :character
## Mean : 693191
## 3rd Qu.: 205273
## Max. :9145436
## NA's :5810
Trying out new countries to try out different analysis, this was India’s renewable energy usage with a linear model and best fit line. I could easily filter for different countries with their names, sometimes needing to refer to the excel file to confirm full names such as “Russian Republic” instead fo just “Russia.”
IndiaEnergy <- energy1 |>
filter(energy1$country_name == "India" )
model <- lm(year ~ ren_energy_cons , data = IndiaEnergy, na.action = na.omit)
rsquared <- summary(model)$r.squared
IndiaEnergy |>
ggplot(mapping = aes(x = year,
y = total_elec_output)) +
geom_point() +
#linear model
geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed',
se = FALSE) +
#best fit line
geom_smooth(se = FALSE, color = 'green') +
labs(title = "India Renewable Energy Consumption by Year",
subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)),
x = 'Years 1990-2016',
y = 'Renewable Energy Consumption') +
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
Reported leading renewable countries by Climate Council.
https://www.climatecouncil.org.au/11-countries-leading-the-charge-on-renewable-energy/
Hypothesis: The top 11 countries reported by Climate Council have a higher ratio for Renewable Electricity Output/Total Electricity Output.
I chose countries based on availability of data.
LeaderEnergy <- energy1 |>
filter(energy1$country_name == "Uruguay" |
energy1$country_name == "Kenya"|
energy1$country_name == "Sweden"|
energy1$country_name == "Germany"|
energy1$country_name == "Iceland"|
energy1$country_name == "Costa Rica"|
energy1$country_name == "United Kingdom"|
energy1$country_name == "China"|
energy1$country_name == "Morocco"|
energy1$country_name == "New Zealand"|
energy1$country_name == "Norway" )
model <- lm(year ~ (ren_energy_output / total_elec_output) , data = LeaderEnergy, na.action = na.omit)
rsquared <- summary(model)$r.squared
LeaderEnergy |>
ggplot(mapping = aes(x = year,
y = (ren_energy_output / total_elec_output))) +
geom_line(mapping = aes(color = country_name)) +
guides(color = guide_legend(title = "Reported Leader Countries"))+
#best fit line
geom_smooth(se = FALSE, color = 'black') +
labs(title = "Yearly Renewable Energy Output Share of Total Electricity Output",
subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)),
x = 'Years 1990-2016',
y = 'Renewable Energy Output/Total Electricity Ouput') +
theme_light()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 11 rows containing missing values (`geom_line()`).
test <- LeaderEnergy %>%
mutate(ratio = ren_energy_output/total_elec_output)
summary(test)
## year country_name country_code rural_electricity_access
## Min. :1990 Length:297 Length:297 Min. : 16.00
## 1st Qu.:1996 Class :character Class :character 1st Qu.:100.00
## Median :2003 Mode :character Mode :character Median :100.00
## Mean :2003 Mean : 99.49
## 3rd Qu.:2010 3rd Qu.:100.00
## Max. :2016 Max. :100.00
## NA's :128
## total_population_electricity_access full_pop_electricity_access
## Min. : 16.00 Length:297
## 1st Qu.:100.00 Class :character
## Median :100.00 Mode :character
## Mean : 97.89
## 3rd Qu.:100.00
## Max. :100.00
## NA's :123
## urban_electricity_access energy_intensity ren_energy_output
## Min. : 95.00 Min. :3 Min. : 443
## 1st Qu.:100.00 1st Qu.:3 1st Qu.: 5741
## Median :100.00 Median :3 Median : 17378
## Mean : 99.97 Mean :3 Mean : 73937
## 3rd Qu.:100.00 3rd Qu.:3 3rd Qu.: 80875
## Max. :100.00 Max. :3 Max. :1398321
## NA's :114 NA's :296 NA's :11
## ren_energy_outputshare_of_totaloutput ren_energy_cons
## Min. : 6 Min. : 30395
## 1st Qu.:23 1st Qu.: 589657
## Median :51 Median :8069989
## Mean :49 Mean :5427883
## 3rd Qu.:66 3rd Qu.:8813803
## Max. :99 Max. :9145436
## NA's :292 NA's :254
## ren_energy_share_of_TFEC total_elec_output TFEC ratio
## Min. : NA Min. : 3235 Min. :1383134 Min. :0.01628
## 1st Qu.: NA 1st Qu.: 8882 1st Qu.:1390919 1st Qu.:0.16103
## Median : NA Median : 40703 Median :1398705 Median :0.66487
## Mean :NaN Mean : 345303 Mean :1398705 Mean :0.56792
## 3rd Qu.: NA 3rd Qu.: 335154 3rd Qu.:1406490 3rd Qu.:0.94931
## Max. : NA Max. :5844158 Max. :1414275 Max. :0.99989
## NA's :297 NA's :11 NA's :295 NA's :11
AmericasEnergy <- energy1 |>
filter(energy1$country_name == "United States" |
energy1$country_name == "Canada"|
energy1$country_name == "Brazil"|
energy1$country_name == "Mexico"|
energy1$country_name == "Chile"|
energy1$country_name == "Argentina")
model <- lm(year ~ (ren_energy_output / total_elec_output) , data = AmericasEnergy, na.action = na.omit)
rsquared <- summary(model)$r.squared
AmericasEnergy |>
ggplot(mapping = aes(x = year,
y = (ren_energy_output / total_elec_output))) +
geom_line(mapping = aes(color = country_name)) +
guides(color = guide_legend(title = "American/Western Hemisphere Countries"))+
#best fit line
geom_smooth(se = FALSE, color = 'black') +
labs(title = "Yearly Renewable Energy Output Share of Total Electricity Output",
subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)),
x = 'Years 1990-2016',
y = 'Renewable Energy Output/Total Electricity Ouput') +
theme_light()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 6 rows containing missing values (`geom_line()`).
test <- AmericasEnergy %>%
mutate(ratio = ren_energy_output/total_elec_output)
summary(test)
## year country_name country_code rural_electricity_access
## Min. :1990 Length:162 Length:162 Min. : 98.00
## 1st Qu.:1996 Class :character Class :character 1st Qu.:100.00
## Median :2003 Mode :character Mode :character Median :100.00
## Mean :2003 Mean : 99.94
## 3rd Qu.:2010 3rd Qu.:100.00
## Max. :2016 Max. :100.00
## NA's :100
## total_population_electricity_access full_pop_electricity_access
## Min. : 90.00 Length:162
## 1st Qu.:100.00 Class :character
## Median :100.00 Mode :character
## Mean : 99.82
## 3rd Qu.:100.00
## Max. :100.00
## NA's :100
## urban_electricity_access energy_intensity ren_energy_output
## Min. : 99.00 Min. : NA Min. : 9891
## 1st Qu.:100.00 1st Qu.: NA 1st Qu.: 30513
## Median :100.00 Median : NA Median :131730
## Mean : 99.98 Mean :NaN Mean :198217
## 3rd Qu.:100.00 3rd Qu.: NA 3rd Qu.:360713
## Max. :100.00 Max. : NA Max. :568439
## NA's :102 NA's :162 NA's :6
## ren_energy_outputshare_of_totaloutput ren_energy_cons
## Min. : 9 Min. : 164464
## 1st Qu.:35 1st Qu.:1549460
## Median :61 Median :2296502
## Mean :53 Mean :2383782
## 3rd Qu.:75 3rd Qu.:3208168
## Max. :89 Max. :5175231
## NA's :159 NA's :76
## ren_energy_share_of_TFEC total_elec_output TFEC ratio
## Min. :11 Min. : 18372 Min. : NA Min. :0.06784
## 1st Qu.:11 1st Qu.: 91551 1st Qu.: NA 1st Qu.:0.17434
## Median :11 Median : 275569 Median : NA Median :0.39899
## Mean :11 Mean : 880140 Mean :NaN Mean :0.44113
## 3rd Qu.:11 3rd Qu.: 596846 3rd Qu.: NA 3rd Qu.:0.62358
## Max. :11 Max. :4354363 Max. : NA Max. :0.95405
## NA's :161 NA's :6 NA's :162 NA's :6
EuropeanEnergy <- energy1 |>
filter(energy1$country_name == "Norway" |
energy1$country_name == "France"|
energy1$country_name == "Sweden"|
energy1$country_name == "Germany"|
energy1$country_name == "United Kingdom")
model <- lm(year ~ (ren_energy_output / total_elec_output) , data = EuropeanEnergy, na.action = na.omit)
rsquared <- summary(model)$r.squared
EuropeanEnergy |>
ggplot(mapping = aes(x = year,
y = (ren_energy_output / total_elec_output))) +
geom_line(mapping = aes(color = country_name)) +
guides(color = guide_legend(title = "European Countries"))+
#best fit line
geom_smooth(se = FALSE, color = 'black') +
labs(title = "Yearly Renewable Energy Output Share of Total Electricity Output",
subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)),
x = 'Years 1990-2016',
y = 'Renewable Energy Output/Total Electricity Ouput') +
theme_light()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 5 rows containing missing values (`geom_line()`).
test <- EuropeanEnergy %>%
mutate(ratio = ren_energy_output/total_elec_output)
summary(test)
## year country_name country_code rural_electricity_access
## Min. :1990 Length:135 Length:135 Min. :100
## 1st Qu.:1996 Class :character Class :character 1st Qu.:100
## Median :2003 Mode :character Mode :character Median :100
## Mean :2003 Mean :100
## 3rd Qu.:2010 3rd Qu.:100
## Max. :2016 Max. :100
## NA's :1
## total_population_electricity_access full_pop_electricity_access
## Min. :100 Length:135
## 1st Qu.:100 Class :character
## Median :100 Mode :character
## Mean :100
## 3rd Qu.:100
## Max. :100
## NA's :1
## urban_electricity_access energy_intensity ren_energy_output
## Min. :100 Min. : NA Min. : 5323
## 1st Qu.:100 1st Qu.: NA 1st Qu.: 36080
## Median :100 Median : NA Median : 71392
## Mean :100 Mean :NaN Mean : 71724
## 3rd Qu.:100 3rd Qu.: NA 3rd Qu.:101409
## Max. :100 Max. : NA Max. :187366
## NA's :135 NA's :5
## ren_energy_outputshare_of_totaloutput ren_energy_cons
## Min. :23 Min. : 102465
## 1st Qu.:30 1st Qu.: 441113
## Median :37 Median : 574613
## Mean :37 Mean : 648924
## 3rd Qu.:44 3rd Qu.: 966484
## Max. :51 Max. :1183042
## NA's :133 NA's :120
## ren_energy_share_of_TFEC total_elec_output TFEC ratio
## Min. : NA Min. :104698 Min. :1383134 Min. :0.01628
## 1st Qu.: NA 1st Qu.:145948 1st Qu.:1390919 1st Qu.:0.06996
## Median : NA Median :362798 Median :1398705 Median :0.15656
## Mean :NaN Mean :349651 Mean :1398705 Mean :0.36050
## 3rd Qu.: NA 3rd Qu.:547921 3rd Qu.:1406490 3rd Qu.:0.55054
## Max. : NA Max. :640967 Max. :1414275 Max. :0.99817
## NA's :135 NA's :5 NA's :133 NA's :5
AsianEnergy <- energy1 |>
filter(energy1$country_name == "China" |
energy1$country_name == "Russian Federation"|
energy1$country_name == "India"|
energy1$country_name == "Japan"|
energy1$country_name == "Saudi Arabia")
model <- lm(year ~ (ren_energy_output / total_elec_output) , data = AsianEnergy, na.action = na.omit)
rsquared <- summary(model)$r.squared
AsianEnergy |>
ggplot(mapping = aes(x = year,
y = (ren_energy_output / total_elec_output))) +
geom_line(mapping = aes(color = country_name)) +
guides(color = guide_legend(title = "Asian Countries"))+
#best fit line
geom_smooth(se = FALSE, color = 'black') +
labs(title = "Yearly Renewable Energy Output Share of Total Electricity Output",
subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)),
x = 'Years 1990-2016',
y = 'Renewable Energy Output/Total Electricity Ouput') +
theme_light()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 5 rows containing missing values (`geom_line()`).
test <- AsianEnergy %>%
mutate(ratio = ren_energy_output/total_elec_output)
summary(test)
## year country_name country_code rural_electricity_access
## Min. :1990 Length:135 Length:135 Min. : 66.0
## 1st Qu.:1996 Class :character Class :character 1st Qu.:100.0
## Median :2003 Mode :character Mode :character Median :100.0
## Mean :2003 Mean : 99.5
## 3rd Qu.:2010 3rd Qu.:100.0
## Max. :2016 Max. :100.0
## NA's :63
## total_population_electricity_access full_pop_electricity_access
## Min. : 75.00 Length:135
## 1st Qu.:100.00 Class :character
## Median :100.00 Mode :character
## Mean : 99.49
## 3rd Qu.:100.00
## Max. :100.00
## NA's :63
## urban_electricity_access energy_intensity ren_energy_output
## Min. : 94.00 Min. : NA Min. : 0
## 1st Qu.:100.00 1st Qu.: NA 1st Qu.: 74123
## Median :100.00 Median : NA Median : 124161
## Mean : 99.92 Mean :NaN Mean : 171445
## 3rd Qu.:100.00 3rd Qu.: NA 3rd Qu.: 174517
## Max. :100.00 Max. : NA Max. :1398321
## NA's :59 NA's :135 NA's :5
## ren_energy_outputshare_of_totaloutput ren_energy_cons
## Min. :0 Min. : 446223
## 1st Qu.:0 1st Qu.:6172063
## Median :0 Median :7655961
## Mean :0 Mean :6997571
## 3rd Qu.:0 3rd Qu.:8725501
## Max. :0 Max. :9145436
## NA's :109 NA's :78
## ren_energy_share_of_TFEC total_elec_output TFEC
## Min. : NA Min. : 69208 Min. :10856619
## 1st Qu.: NA 1st Qu.: 450443 1st Qu.:12117877
## Median : NA Median : 929075 Median :13379136
## Mean :NaN Mean :1073553 Mean :13379136
## 3rd Qu.: NA 3rd Qu.:1080795 3rd Qu.:14640394
## Max. : NA Max. :5844158 Max. :15901652
## NA's :135 NA's :5 NA's :133
## ratio
## Min. :0.00000
## 1st Qu.:0.09123
## Median :0.15714
## Mean :0.12575
## 3rd Qu.:0.17638
## Max. :0.24489
## NA's :5
The means for Europe, The Americas, Asia, and top eleven reported countries were 36.1%, 12.6%, 44.1%, 56.8%, respectively. Thus, the reported top 11 countries do indeed carry a higher mean for renewable electricity use over total electricity use.