library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(ggrepel)
library(gapminder)
setwd("C:/Users/kaitl/OneDrive/Documents/590_Working")

#update data types of dataframe
energy <- read_delim("./590_FinalData1.csv", delim = ",", col_types = "icciiciiiiiiii")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
energy1 <- energy
energy1[energy1 == '..'] <- NA

These first few trials are to gain insight on the data.

United States, China, and India’s renewable energy consumption from 1990-2016:

TrialEnergy <- energy1 |>
    filter(energy1$country_name == "United States" | 
             energy1$country_name == "China"|
             energy1$country_name == "India")

model <- lm(year ~ ren_energy_cons , data = TrialEnergy, na.action = na.omit)

rsquared <- summary(model)$r.squared

TrialEnergy |> 
  ggplot(mapping = aes(x = year, 
                       y = ren_energy_cons)) +
  geom_point(mapping = aes(color = country_name)) +
  guides(color = guide_legend(title = "Country"))+
  #linear model
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', 
              se = FALSE) +
  #best fit line
  geom_smooth(se = FALSE, color = 'black') +
  labs(title = "Renewable Energy Consumption by Year",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)), 
       x = 'Years 1990-2016', 
       y = 'Renewable Energy Consumption') +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).

United States, China, and India total electricity consumption from 1990-2016.

TrialTotalEnergy <- energy1 |>
    filter(energy1$country_name == "United States" | 
             energy1$country_name == "China"|
             energy1$country_name == "India")

model <- lm(year ~ total_elec_output , data = TrialTotalEnergy, na.action = na.omit)

rsquared <- summary(model)$r.squared

TrialTotalEnergy |> 
  ggplot(mapping = aes(x = year, 
                       y = total_elec_output)) +
  geom_point(mapping = aes(color = country_name)) +
  guides(color = guide_legend(title = "Country"))+
  #linear model
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', 
              se = FALSE) +
  #best fit line
  geom_smooth(se = FALSE, color = 'black') +
  labs(title = "Total Electric Energy Consumption by Year",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)), 
       x = 'Years 1990-2016', 
       y = 'Renewable Energy Consumption') +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).

Better representation of this time series, considering all other countries.

Unfinished, but left this in to share ideas, I thought this would be a better implementation of a time series. I was hoping to see a slight oscillation with weather changes in countries (depending on their global position). It would be interesting to see how different countries use energy as the seasons change.

# #update format of dates to be consistent
# energy1$year <- as.Date(energy1$year , format = "d%/m%/%y")
# 
# # create a tsibble of renewable energy consumption for the United States
# energy_ <- energy1 |>
#   filter(country_name == "United States") |>
#   select(year, ren_energy_cons) |>
#   distinct()
# 
# energy_ts <- as_tsibble(energy_, index=year) |>
#   index_by(date = date(year))
# 
# energy_ts

Summarizing and testing for groups of variables for large summary statistics. I was interested in being able to compare large pieces of data that include every country, but with so many countries not being able to report their data and NA values being left, I found those large statistics more difficult to work with, resulting in my ultimate hypothesis.

dat <- energy1 |>
  select(country_name, ren_energy_cons, full_pop_electricity_access)
summary(dat)
##  country_name       ren_energy_cons   full_pop_electricity_access
##  Length:6993        Min.   :      0   Length:6993                
##  Class :character   1st Qu.:      0   Class :character           
##  Mode  :character   Median :      0   Mode  :character           
##                     Mean   : 693191                              
##                     3rd Qu.: 205273                              
##                     Max.   :9145436                              
##                     NA's   :5810

Trying out new countries to try out different analysis, this was India’s renewable energy usage with a linear model and best fit line. I could easily filter for different countries with their names, sometimes needing to refer to the excel file to confirm full names such as “Russian Republic” instead fo just “Russia.”

IndiaEnergy <- energy1 |>
    filter(energy1$country_name == "India" )

model <- lm(year ~ ren_energy_cons , data = IndiaEnergy, na.action = na.omit)

rsquared <- summary(model)$r.squared

IndiaEnergy |> 
  ggplot(mapping = aes(x = year, 
                       y = total_elec_output)) +
  geom_point() +
  #linear model
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', 
              se = FALSE) +
  #best fit line
  geom_smooth(se = FALSE, color = 'green') +
  labs(title = "India Renewable Energy Consumption by Year",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)), 
       x = 'Years 1990-2016', 
       y = 'Renewable Energy Consumption') +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

Reported leading renewable countries by Climate Council.

https://www.climatecouncil.org.au/11-countries-leading-the-charge-on-renewable-energy/

Hypothesis: The top 11 countries reported by Climate Council have a higher ratio for Renewable Electricity Output/Total Electricity Output.

I chose countries based on availability of data.

LeaderEnergy <- energy1 |>
    filter(energy1$country_name == "Uruguay" | 
             energy1$country_name == "Kenya"|
             energy1$country_name == "Sweden"|
             energy1$country_name == "Germany"|
             energy1$country_name == "Iceland"|
             energy1$country_name == "Costa Rica"|
             energy1$country_name == "United Kingdom"|
             energy1$country_name == "China"|
             energy1$country_name == "Morocco"|
             energy1$country_name == "New Zealand"|
             energy1$country_name == "Norway" )

model <- lm(year ~ (ren_energy_output / total_elec_output) , data = LeaderEnergy, na.action = na.omit)

rsquared <- summary(model)$r.squared

LeaderEnergy |> 
  ggplot(mapping = aes(x = year, 
                       y = (ren_energy_output / total_elec_output))) +
  geom_line(mapping = aes(color = country_name)) +
  guides(color = guide_legend(title = "Reported Leader Countries"))+
  #best fit line
  geom_smooth(se = FALSE, color = 'black') +
  labs(title = "Yearly Renewable Energy Output Share of Total Electricity Output",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)), 
       x = 'Years 1990-2016', 
       y = 'Renewable Energy Output/Total Electricity Ouput') +
  theme_light()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 11 rows containing missing values (`geom_line()`).

test <- LeaderEnergy %>%
  mutate(ratio = ren_energy_output/total_elec_output)
summary(test)
##       year      country_name       country_code       rural_electricity_access
##  Min.   :1990   Length:297         Length:297         Min.   : 16.00          
##  1st Qu.:1996   Class :character   Class :character   1st Qu.:100.00          
##  Median :2003   Mode  :character   Mode  :character   Median :100.00          
##  Mean   :2003                                         Mean   : 99.49          
##  3rd Qu.:2010                                         3rd Qu.:100.00          
##  Max.   :2016                                         Max.   :100.00          
##                                                       NA's   :128             
##  total_population_electricity_access full_pop_electricity_access
##  Min.   : 16.00                      Length:297                 
##  1st Qu.:100.00                      Class :character           
##  Median :100.00                      Mode  :character           
##  Mean   : 97.89                                                 
##  3rd Qu.:100.00                                                 
##  Max.   :100.00                                                 
##  NA's   :123                                                    
##  urban_electricity_access energy_intensity ren_energy_output
##  Min.   : 95.00           Min.   :3        Min.   :    443  
##  1st Qu.:100.00           1st Qu.:3        1st Qu.:   5741  
##  Median :100.00           Median :3        Median :  17378  
##  Mean   : 99.97           Mean   :3        Mean   :  73937  
##  3rd Qu.:100.00           3rd Qu.:3        3rd Qu.:  80875  
##  Max.   :100.00           Max.   :3        Max.   :1398321  
##  NA's   :114              NA's   :296      NA's   :11       
##  ren_energy_outputshare_of_totaloutput ren_energy_cons  
##  Min.   : 6                            Min.   :  30395  
##  1st Qu.:23                            1st Qu.: 589657  
##  Median :51                            Median :8069989  
##  Mean   :49                            Mean   :5427883  
##  3rd Qu.:66                            3rd Qu.:8813803  
##  Max.   :99                            Max.   :9145436  
##  NA's   :292                           NA's   :254      
##  ren_energy_share_of_TFEC total_elec_output      TFEC             ratio        
##  Min.   : NA              Min.   :   3235   Min.   :1383134   Min.   :0.01628  
##  1st Qu.: NA              1st Qu.:   8882   1st Qu.:1390919   1st Qu.:0.16103  
##  Median : NA              Median :  40703   Median :1398705   Median :0.66487  
##  Mean   :NaN              Mean   : 345303   Mean   :1398705   Mean   :0.56792  
##  3rd Qu.: NA              3rd Qu.: 335154   3rd Qu.:1406490   3rd Qu.:0.94931  
##  Max.   : NA              Max.   :5844158   Max.   :1414275   Max.   :0.99989  
##  NA's   :297              NA's   :11        NA's   :295       NA's   :11
AmericasEnergy <- energy1 |>
    filter(energy1$country_name == "United States" | 
             energy1$country_name == "Canada"|
             energy1$country_name == "Brazil"|
             energy1$country_name == "Mexico"|
             energy1$country_name == "Chile"|
             energy1$country_name == "Argentina")

model <- lm(year ~ (ren_energy_output / total_elec_output) , data = AmericasEnergy, na.action = na.omit)

rsquared <- summary(model)$r.squared

AmericasEnergy |> 
  ggplot(mapping = aes(x = year, 
                       y = (ren_energy_output / total_elec_output))) +
  geom_line(mapping = aes(color = country_name)) +
  guides(color = guide_legend(title = "American/Western Hemisphere Countries"))+
  #best fit line
  geom_smooth(se = FALSE, color = 'black') +
  labs(title = "Yearly Renewable Energy Output Share of Total Electricity Output",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)), 
       x = 'Years 1990-2016', 
       y = 'Renewable Energy Output/Total Electricity Ouput') +
  theme_light()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 6 rows containing missing values (`geom_line()`).

test <- AmericasEnergy %>%
  mutate(ratio = ren_energy_output/total_elec_output)
summary(test)
##       year      country_name       country_code       rural_electricity_access
##  Min.   :1990   Length:162         Length:162         Min.   : 98.00          
##  1st Qu.:1996   Class :character   Class :character   1st Qu.:100.00          
##  Median :2003   Mode  :character   Mode  :character   Median :100.00          
##  Mean   :2003                                         Mean   : 99.94          
##  3rd Qu.:2010                                         3rd Qu.:100.00          
##  Max.   :2016                                         Max.   :100.00          
##                                                       NA's   :100             
##  total_population_electricity_access full_pop_electricity_access
##  Min.   : 90.00                      Length:162                 
##  1st Qu.:100.00                      Class :character           
##  Median :100.00                      Mode  :character           
##  Mean   : 99.82                                                 
##  3rd Qu.:100.00                                                 
##  Max.   :100.00                                                 
##  NA's   :100                                                    
##  urban_electricity_access energy_intensity ren_energy_output
##  Min.   : 99.00           Min.   : NA      Min.   :  9891   
##  1st Qu.:100.00           1st Qu.: NA      1st Qu.: 30513   
##  Median :100.00           Median : NA      Median :131730   
##  Mean   : 99.98           Mean   :NaN      Mean   :198217   
##  3rd Qu.:100.00           3rd Qu.: NA      3rd Qu.:360713   
##  Max.   :100.00           Max.   : NA      Max.   :568439   
##  NA's   :102              NA's   :162      NA's   :6        
##  ren_energy_outputshare_of_totaloutput ren_energy_cons  
##  Min.   : 9                            Min.   : 164464  
##  1st Qu.:35                            1st Qu.:1549460  
##  Median :61                            Median :2296502  
##  Mean   :53                            Mean   :2383782  
##  3rd Qu.:75                            3rd Qu.:3208168  
##  Max.   :89                            Max.   :5175231  
##  NA's   :159                           NA's   :76       
##  ren_energy_share_of_TFEC total_elec_output      TFEC         ratio        
##  Min.   :11               Min.   :  18372   Min.   : NA   Min.   :0.06784  
##  1st Qu.:11               1st Qu.:  91551   1st Qu.: NA   1st Qu.:0.17434  
##  Median :11               Median : 275569   Median : NA   Median :0.39899  
##  Mean   :11               Mean   : 880140   Mean   :NaN   Mean   :0.44113  
##  3rd Qu.:11               3rd Qu.: 596846   3rd Qu.: NA   3rd Qu.:0.62358  
##  Max.   :11               Max.   :4354363   Max.   : NA   Max.   :0.95405  
##  NA's   :161              NA's   :6         NA's   :162   NA's   :6
EuropeanEnergy <- energy1 |>
    filter(energy1$country_name == "Norway" | 
             energy1$country_name == "France"|
             energy1$country_name == "Sweden"|
             energy1$country_name == "Germany"|
             energy1$country_name == "United Kingdom")

model <- lm(year ~ (ren_energy_output / total_elec_output) , data = EuropeanEnergy, na.action = na.omit)

rsquared <- summary(model)$r.squared

EuropeanEnergy |> 
  ggplot(mapping = aes(x = year, 
                       y = (ren_energy_output / total_elec_output))) +
  geom_line(mapping = aes(color = country_name)) +
  guides(color = guide_legend(title = "European Countries"))+
  #best fit line
  geom_smooth(se = FALSE, color = 'black') +
  labs(title = "Yearly Renewable Energy Output Share of Total Electricity Output",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)), 
       x = 'Years 1990-2016', 
       y = 'Renewable Energy Output/Total Electricity Ouput') +
  theme_light()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 5 rows containing missing values (`geom_line()`).

test <- EuropeanEnergy %>%
  mutate(ratio = ren_energy_output/total_elec_output)
summary(test)
##       year      country_name       country_code       rural_electricity_access
##  Min.   :1990   Length:135         Length:135         Min.   :100             
##  1st Qu.:1996   Class :character   Class :character   1st Qu.:100             
##  Median :2003   Mode  :character   Mode  :character   Median :100             
##  Mean   :2003                                         Mean   :100             
##  3rd Qu.:2010                                         3rd Qu.:100             
##  Max.   :2016                                         Max.   :100             
##                                                       NA's   :1               
##  total_population_electricity_access full_pop_electricity_access
##  Min.   :100                         Length:135                 
##  1st Qu.:100                         Class :character           
##  Median :100                         Mode  :character           
##  Mean   :100                                                    
##  3rd Qu.:100                                                    
##  Max.   :100                                                    
##  NA's   :1                                                      
##  urban_electricity_access energy_intensity ren_energy_output
##  Min.   :100              Min.   : NA      Min.   :  5323   
##  1st Qu.:100              1st Qu.: NA      1st Qu.: 36080   
##  Median :100              Median : NA      Median : 71392   
##  Mean   :100              Mean   :NaN      Mean   : 71724   
##  3rd Qu.:100              3rd Qu.: NA      3rd Qu.:101409   
##  Max.   :100              Max.   : NA      Max.   :187366   
##                           NA's   :135      NA's   :5        
##  ren_energy_outputshare_of_totaloutput ren_energy_cons  
##  Min.   :23                            Min.   : 102465  
##  1st Qu.:30                            1st Qu.: 441113  
##  Median :37                            Median : 574613  
##  Mean   :37                            Mean   : 648924  
##  3rd Qu.:44                            3rd Qu.: 966484  
##  Max.   :51                            Max.   :1183042  
##  NA's   :133                           NA's   :120      
##  ren_energy_share_of_TFEC total_elec_output      TFEC             ratio        
##  Min.   : NA              Min.   :104698    Min.   :1383134   Min.   :0.01628  
##  1st Qu.: NA              1st Qu.:145948    1st Qu.:1390919   1st Qu.:0.06996  
##  Median : NA              Median :362798    Median :1398705   Median :0.15656  
##  Mean   :NaN              Mean   :349651    Mean   :1398705   Mean   :0.36050  
##  3rd Qu.: NA              3rd Qu.:547921    3rd Qu.:1406490   3rd Qu.:0.55054  
##  Max.   : NA              Max.   :640967    Max.   :1414275   Max.   :0.99817  
##  NA's   :135              NA's   :5         NA's   :133       NA's   :5
AsianEnergy <- energy1 |>
    filter(energy1$country_name == "China" | 
             energy1$country_name == "Russian Federation"|
             energy1$country_name == "India"|
             energy1$country_name == "Japan"|
             energy1$country_name == "Saudi Arabia")

model <- lm(year ~ (ren_energy_output / total_elec_output) , data = AsianEnergy, na.action = na.omit)

rsquared <- summary(model)$r.squared

AsianEnergy |> 
  ggplot(mapping = aes(x = year, 
                       y = (ren_energy_output / total_elec_output))) +
  geom_line(mapping = aes(color = country_name)) +
  guides(color = guide_legend(title = "Asian Countries"))+
  #best fit line
  geom_smooth(se = FALSE, color = 'black') +
  labs(title = "Yearly Renewable Energy Output Share of Total Electricity Output",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3)), 
       x = 'Years 1990-2016', 
       y = 'Renewable Energy Output/Total Electricity Ouput') +
  theme_light()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 5 rows containing missing values (`geom_line()`).

test <- AsianEnergy %>%
  mutate(ratio = ren_energy_output/total_elec_output)
summary(test)
##       year      country_name       country_code       rural_electricity_access
##  Min.   :1990   Length:135         Length:135         Min.   : 66.0           
##  1st Qu.:1996   Class :character   Class :character   1st Qu.:100.0           
##  Median :2003   Mode  :character   Mode  :character   Median :100.0           
##  Mean   :2003                                         Mean   : 99.5           
##  3rd Qu.:2010                                         3rd Qu.:100.0           
##  Max.   :2016                                         Max.   :100.0           
##                                                       NA's   :63              
##  total_population_electricity_access full_pop_electricity_access
##  Min.   : 75.00                      Length:135                 
##  1st Qu.:100.00                      Class :character           
##  Median :100.00                      Mode  :character           
##  Mean   : 99.49                                                 
##  3rd Qu.:100.00                                                 
##  Max.   :100.00                                                 
##  NA's   :63                                                     
##  urban_electricity_access energy_intensity ren_energy_output
##  Min.   : 94.00           Min.   : NA      Min.   :      0  
##  1st Qu.:100.00           1st Qu.: NA      1st Qu.:  74123  
##  Median :100.00           Median : NA      Median : 124161  
##  Mean   : 99.92           Mean   :NaN      Mean   : 171445  
##  3rd Qu.:100.00           3rd Qu.: NA      3rd Qu.: 174517  
##  Max.   :100.00           Max.   : NA      Max.   :1398321  
##  NA's   :59               NA's   :135      NA's   :5        
##  ren_energy_outputshare_of_totaloutput ren_energy_cons  
##  Min.   :0                             Min.   : 446223  
##  1st Qu.:0                             1st Qu.:6172063  
##  Median :0                             Median :7655961  
##  Mean   :0                             Mean   :6997571  
##  3rd Qu.:0                             3rd Qu.:8725501  
##  Max.   :0                             Max.   :9145436  
##  NA's   :109                           NA's   :78       
##  ren_energy_share_of_TFEC total_elec_output      TFEC         
##  Min.   : NA              Min.   :  69208   Min.   :10856619  
##  1st Qu.: NA              1st Qu.: 450443   1st Qu.:12117877  
##  Median : NA              Median : 929075   Median :13379136  
##  Mean   :NaN              Mean   :1073553   Mean   :13379136  
##  3rd Qu.: NA              3rd Qu.:1080795   3rd Qu.:14640394  
##  Max.   : NA              Max.   :5844158   Max.   :15901652  
##  NA's   :135              NA's   :5         NA's   :133       
##      ratio        
##  Min.   :0.00000  
##  1st Qu.:0.09123  
##  Median :0.15714  
##  Mean   :0.12575  
##  3rd Qu.:0.17638  
##  Max.   :0.24489  
##  NA's   :5

The means for Europe, The Americas, Asia, and top eleven reported countries were 36.1%, 12.6%, 44.1%, 56.8%, respectively. Thus, the reported top 11 countries do indeed carry a higher mean for renewable electricity use over total electricity use.