Create 3 sets of variable combinations with one column created by me (using mutate)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(ggrepel)
setwd("C:/Users/kaitl/OneDrive/Documents/590_Working")
#update data types of dataframe
energy <- read_delim("./590_FinalData1.csv", delim = ",", col_types = "nccnncnnnnnnnn")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
energy1 <- energy
energy1[energy1 == '..'] <- NA
First set: Response = output_perc_of_consumption, Explanatory = ren_energy_output, ren_energy_cons
This graph informs us of the percentage of energy output divided by the energy consumption, showing the percentage of energy provided by country, versus how much they actually consumed. Some countries must provide electricity to others, renewable and nonrenewable.
# #create 3 new columns for sets
# #perc of ren energy output of consumption (output/consumption)
# #output makes up % of the total consumption
# energy2 <- energy1 %>%
# mutate(output_perc_of_consumption = ren_energy_output / ren_energy_cons)
#
# ggplot(data = energy2, aes(x = country_name, y = output_perc_of_consumption))+
# geom_point()
#
# output_cons_se <- sd(ren_energy_cons)
#
#
# bootstrap <- function (x, func=mean, n_iter=10^4) {
# # empty vector to be filled with values from each iteration
# func_values <- c(NULL)
#
# # we simulate sampling `n_iter` times
# for (i in 1:n_iter) {
# # pull the sample
# x_sample <- sample(x, size = length(x), replace = TRUE)
#
# # add on this iteration's value to the collection
# func_values <- c(func_values, func(x_sample))
# }
#
# return(func_values)
# }
#
# output_cons_means <- bootstrap(ren_energy_cons)
#
# ggplot() +
# geom_histogram(mapping = aes(x = output_cons_means),
# color='white') +
# labs(title = "10K Bootstrapped Sample Means of `output_cons_means`",
# subtitle = "A *simulation* of the true sampling distribution",
# x = "Bootstrapped Sample Mean",
# y = "Number of Samples") +
# theme_minimal()
Second set: Response = urban_plus_rural_access, Explanatory = urban_electricty_access, rural_electricty_access.
Reported country percentages of electricity access are added to see the distribution of the count of countries in each category of total access. This shows that the most popular group is 200, meaning most countries are able to provide full access to everyone (urban and rural). Keep in mind that some countries have null values, so this is an estimation and the histogram is not complete due to lack of data.
#urban electricity + rural electricity
energy3 <- energy1 %>%
mutate(urban_plus_rural_access = urban_electricity_access + rural_electricity_access)
ggplot(data = energy3, aes(x = urban_plus_rural_access))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1864 rows containing non-finite values (`stat_bin()`).
Third set: Response = historic_energy_consumption, Explanatory = year, country_name
The sum of energy consumption for each year and country. This shows us how much energy each country has consumed for each year the data has been recorded.
#estimation of gWh rural areas are using from total consumption (rural access * TFEC total final energy consumption)
energy4 <- energy1 %>%
group_by(year, country_name) %>%
summarise(historic_energy_consumption = sum(TFEC),
.groups = 'drop' ) %>%
mutate(historic_energy_consumption)
ggplot(data = energy4, aes(x = year, y = historic_energy_consumption))+
geom_point()
## Warning: Removed 957 rows containing missing values (`geom_point()`).