Question 1
data(cars)
head(cars)
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
median(cars[, 1])
## [1] 15
Question 2
library(jsonlite)
Question 3
Analyzing Global Trends in Renewable Energy Investment and COâ‚‚
Emissions
Research Questions:
1. How have renewable energy investments changed across different
countries over the past 10 years?
2. Is there a measurable relationship between renewable energy
consumption and COâ‚‚ emissions?
3. Which regions are leading in renewable energy development, and
how does this correlate with their GDP?
Data Cleaning
check for missing values
sum(is.na(energy_data))
## [1] 1320
apply(is.na(energy_data), 2, sum)
## country iso2c iso3c
## 0 0 0
## year renewable_energy gdp
## 0 563 96
## renewable_electricity
## 661
library(stringr)
str_detect(energy_data,"NA")
## Warning in stri_detect_regex(string, pattern, negate = negate, opts_regex =
## opts(pattern)): argument is not an atomic vector; coercing
## [1] FALSE TRUE TRUE FALSE TRUE TRUE TRUE
Fill in for missing values
ED_complete <-energy_data[-which(is.na(energy_data$sources)),]
str(ED_complete)
## 'data.frame': 0 obs. of 7 variables:
## $ country : chr
## $ iso2c : chr
## $ iso3c : chr
## $ year : int
## $ renewable_energy : num
## $ gdp : num
## $ renewable_electricity: num
ED_complete1 <- energy_data[complete.cases(energy_data),]
str(ED_complete1)
## 'data.frame': 2152 obs. of 7 variables:
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ iso2c : chr "AF" "AF" "AF" "AF" ...
## $ iso3c : chr "AFG" "AFG" "AFG" "AFG" ...
## $ year : int 2013 2014 2015 2016 2017 2018 2019 2020 2021 2013 ...
## $ renewable_energy : num 16.9 19.1 17.7 20.2 19.5 ...
## $ gdp : num 2.01e+10 2.05e+10 1.91e+10 1.81e+10 1.88e+10 ...
## $ renewable_electricity: num 81 85.5 87.6 87.8 86.9 ...
ED_complete2 <- na.omit(energy_data)
str(ED_complete2)
## 'data.frame': 2152 obs. of 7 variables:
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ iso2c : chr "AF" "AF" "AF" "AF" ...
## $ iso3c : chr "AFG" "AFG" "AFG" "AFG" ...
## $ year : int 2013 2014 2015 2016 2017 2018 2019 2020 2021 2013 ...
## $ renewable_energy : num 16.9 19.1 17.7 20.2 19.5 ...
## $ gdp : num 2.01e+10 2.05e+10 1.91e+10 1.81e+10 1.88e+10 ...
## $ renewable_electricity: num 81 85.5 87.6 87.8 86.9 ...
## - attr(*, "na.action")= 'omit' Named int [1:774] 10 11 20 21 22 31 32 33 34 35 ...
## ..- attr(*, "names")= chr [1:774] "10" "11" "20" "21" ...
Research Question 1
energy_trend <- energy_data %>%
group_by(country) %>%
summarise(change = last(renewable_energy) - first(renewable_energy))
energy_trend %>%
arrange(desc(change)) %>%
slice(1:10)
## # A tibble: 10 × 2
## country change
## <chr> <dbl>
## 1 Afghanistan NA
## 2 Africa Eastern and Southern NA
## 3 Africa Western and Central NA
## 4 Albania NA
## 5 Algeria NA
## 6 American Samoa NA
## 7 Andorra NA
## 8 Angola NA
## 9 Antigua and Barbuda NA
## 10 Arab World NA
Research Question 2
library(ggplot2)
ggplot(energy_data, aes(x = renewable_energy, y = renewable_electricity)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "blue") +
labs(title = "Relationship between Renewable Energy and COâ‚‚ Emissions")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 727 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 727 rows containing missing values or values outside the scale range
## (`geom_point()`).

Research Question 3
region_summary <- energy_data %>%
group_by(country) %>%
summarise(
avg_renewable = mean(renewable_energy, na.rm = TRUE),
avg_gdp = mean(gdp, na.rm = TRUE)
)
ggplot(region_summary, aes(x = avg_gdp, y = avg_renewable, label = country)) +
geom_point(size = 3, color = "darkgreen") +
geom_text(nudge_y = 2) +
labs(
title = "Renewable Energy vs GDP by Region (Avg 2013–2023)",
x = "Average GDP (US$)",
y = "Average Renewable Energy (%)"
)
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_text()`).
