Question 1

data(cars)
head(cars)
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
median(cars[, 1])
## [1] 15

Question 2

library(jsonlite)

form API URL

url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"
btc_data <- fromJSON(url)
btc_df <- btc_data$Data$Data
head(btc_df)
##         time     high      low     open volumefrom   volumeto    close
## 1 1752019200 112077.2 108341.9 108955.1   19761.34 2177951433 111291.7
## 2 1752105600 116848.3 110555.5 111291.7   31905.27 3616719328 116023.3
## 3 1752192000 118890.3 115236.7 116023.3   26050.27 3060911543 117573.9
## 4 1752278400 118240.0 116954.1 117573.9    7395.67  869899617 117468.2
## 5 1752364800 119503.6 117264.6 117468.2    9553.52 1133368268 119127.7
## 6 1752451200 123220.3 118951.6 119127.7   31361.70 3784774921 119869.0
##   conversionType conversionSymbol
## 1         direct                 
## 2         direct                 
## 3         direct                 
## 4         direct                 
## 5         direct                 
## 6         direct
max_close <- max(btc_df$close, na.rm = TRUE)
max_close
## [1] 124723

Question 3

Research Questions:

1. How have renewable energy investments changed across different countries over the past 10 years?

2. Is there a measurable relationship between renewable energy consumption and COâ‚‚ emissions?

3. Which regions are leading in renewable energy development, and how does this correlate with their GDP?

Data Extraction:

library(WDI)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
indicators <- c(
  "renewable_energy" = "EG.FEC.RNEW.ZS",
  "co2_emissions" = "EN.ATM.CO2E.PC",
  "gdp" = "NY.GDP.MKTP.CD",
  "renewable_electricity" = "EG.ELC.RNEW.ZS"
)

energy_data <- WDI(country = "all", indicator = indicators, start = 2013, end = 2023)
## Warning in WDI(country = "all", indicator = indicators, start = 2013, end = 2023): The following indicators could not be downloaded: EN.ATM.CO2E.PC.
## 
## Please make sure that you are running the latest version of the `WDI` package, and that the arguments you are using in the `WDI()` function are valid.
## 
## Sometimes, downloads will suddenly stop working, even if nothing has changed in the R code of the WDI package. ("The same WDI package version worked yesterday!") In those cases, the problem is almost certainly related to the World Bank servers or to your internet connection.
## 
## You can check if the World Bank web API is currently serving the indicator(s) of interest by typing a URL of this form in your web browser:
## 
## https://api.worldbank.org/v2/en/country/all/indicator/EN.ATM.CO2E.PC?format=json&date=:&per_page=32500&page=1
head(energy_data)
##       country iso2c iso3c year renewable_energy         gdp
## 1 Afghanistan    AF   AFG 2013             16.9 20146416758
## 2 Afghanistan    AF   AFG 2014             19.1 20497128556
## 3 Afghanistan    AF   AFG 2015             17.7 19134221645
## 4 Afghanistan    AF   AFG 2016             20.2 18116572395
## 5 Afghanistan    AF   AFG 2017             19.5 18753456498
## 6 Afghanistan    AF   AFG 2018             18.3 18053222687
##   renewable_electricity
## 1              81.00864
## 2              85.46147
## 3              87.57824
## 4              87.76127
## 5              86.86458
## 6              82.67200

Data Cleaning

check for missing values

sum(is.na(energy_data))
## [1] 1320
apply(is.na(energy_data), 2, sum)
##               country                 iso2c                 iso3c 
##                     0                     0                     0 
##                  year      renewable_energy                   gdp 
##                     0                   563                    96 
## renewable_electricity 
##                   661
library(stringr)
str_detect(energy_data,"NA")
## Warning in stri_detect_regex(string, pattern, negate = negate, opts_regex =
## opts(pattern)): argument is not an atomic vector; coercing
## [1] FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE

Fill in for missing values

ED_complete <-energy_data[-which(is.na(energy_data$sources)),]
str(ED_complete)
## 'data.frame':    0 obs. of  7 variables:
##  $ country              : chr 
##  $ iso2c                : chr 
##  $ iso3c                : chr 
##  $ year                 : int 
##  $ renewable_energy     : num 
##  $ gdp                  : num 
##  $ renewable_electricity: num
ED_complete1 <- energy_data[complete.cases(energy_data),]
str(ED_complete1)
## 'data.frame':    2152 obs. of  7 variables:
##  $ country              : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ iso2c                : chr  "AF" "AF" "AF" "AF" ...
##  $ iso3c                : chr  "AFG" "AFG" "AFG" "AFG" ...
##  $ year                 : int  2013 2014 2015 2016 2017 2018 2019 2020 2021 2013 ...
##  $ renewable_energy     : num  16.9 19.1 17.7 20.2 19.5 ...
##  $ gdp                  : num  2.01e+10 2.05e+10 1.91e+10 1.81e+10 1.88e+10 ...
##  $ renewable_electricity: num  81 85.5 87.6 87.8 86.9 ...
ED_complete2 <- na.omit(energy_data)
str(ED_complete2)
## 'data.frame':    2152 obs. of  7 variables:
##  $ country              : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ iso2c                : chr  "AF" "AF" "AF" "AF" ...
##  $ iso3c                : chr  "AFG" "AFG" "AFG" "AFG" ...
##  $ year                 : int  2013 2014 2015 2016 2017 2018 2019 2020 2021 2013 ...
##  $ renewable_energy     : num  16.9 19.1 17.7 20.2 19.5 ...
##  $ gdp                  : num  2.01e+10 2.05e+10 1.91e+10 1.81e+10 1.88e+10 ...
##  $ renewable_electricity: num  81 85.5 87.6 87.8 86.9 ...
##  - attr(*, "na.action")= 'omit' Named int [1:774] 10 11 20 21 22 31 32 33 34 35 ...
##   ..- attr(*, "names")= chr [1:774] "10" "11" "20" "21" ...

Research Question 1

energy_trend <- energy_data %>%
  group_by(country) %>%
  summarise(change = last(renewable_energy) - first(renewable_energy))


energy_trend %>%
  arrange(desc(change)) %>%
  slice(1:10)
## # A tibble: 10 × 2
##    country                     change
##    <chr>                        <dbl>
##  1 Afghanistan                     NA
##  2 Africa Eastern and Southern     NA
##  3 Africa Western and Central      NA
##  4 Albania                         NA
##  5 Algeria                         NA
##  6 American Samoa                  NA
##  7 Andorra                         NA
##  8 Angola                          NA
##  9 Antigua and Barbuda             NA
## 10 Arab World                      NA

Research Question 2

library(ggplot2)
ggplot(energy_data, aes(x = renewable_energy, y = renewable_electricity)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", color = "blue") +
  labs(title = "Relationship between Renewable Energy and COâ‚‚ Emissions")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 727 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 727 rows containing missing values or values outside the scale range
## (`geom_point()`).

Research Question 3

region_summary <- energy_data %>%
  group_by(country) %>%
  summarise(
    avg_renewable = mean(renewable_energy, na.rm = TRUE),
    avg_gdp = mean(gdp, na.rm = TRUE)
  )
ggplot(region_summary, aes(x = avg_gdp, y = avg_renewable, label = country)) +
  geom_point(size = 3, color = "darkgreen") +
  geom_text(nudge_y = 2) +
  labs(
    title = "Renewable Energy vs GDP by Region (Avg 2013–2023)",
    x = "Average GDP (US$)",
    y = "Average Renewable Energy (%)"
  )
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_text()`).